vexor 0.19.0a1__tar.gz → 0.21.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {vexor-0.19.0a1 → vexor-0.21.0}/PKG-INFO +42 -30
  2. {vexor-0.19.0a1 → vexor-0.21.0}/README.md +41 -29
  3. {vexor-0.19.0a1 → vexor-0.21.0}/plugins/vexor/.claude-plugin/plugin.json +1 -1
  4. {vexor-0.19.0a1 → vexor-0.21.0}/plugins/vexor/skills/vexor-cli/SKILL.md +1 -0
  5. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/__init__.py +4 -2
  6. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/api.py +87 -1
  7. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/cache.py +483 -275
  8. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/cli.py +78 -5
  9. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/config.py +240 -2
  10. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/providers/gemini.py +79 -13
  11. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/providers/openai.py +79 -13
  12. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/services/config_service.py +14 -0
  13. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/services/index_service.py +285 -4
  14. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/services/search_service.py +235 -24
  15. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/text.py +14 -0
  16. {vexor-0.19.0a1 → vexor-0.21.0}/.gitignore +0 -0
  17. {vexor-0.19.0a1 → vexor-0.21.0}/LICENSE +0 -0
  18. {vexor-0.19.0a1 → vexor-0.21.0}/gui/README.md +0 -0
  19. {vexor-0.19.0a1 → vexor-0.21.0}/plugins/vexor/README.md +0 -0
  20. {vexor-0.19.0a1 → vexor-0.21.0}/plugins/vexor/skills/vexor-cli/references/install-vexor.md +0 -0
  21. {vexor-0.19.0a1 → vexor-0.21.0}/pyproject.toml +0 -0
  22. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/__main__.py +0 -0
  23. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/modes.py +0 -0
  24. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/output.py +0 -0
  25. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/providers/__init__.py +0 -0
  26. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/providers/local.py +0 -0
  27. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/search.py +0 -0
  28. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/services/__init__.py +0 -0
  29. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/services/cache_service.py +0 -0
  30. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/services/content_extract_service.py +0 -0
  31. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/services/init_service.py +0 -0
  32. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/services/js_parser.py +0 -0
  33. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/services/keyword_service.py +0 -0
  34. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/services/skill_service.py +0 -0
  35. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/services/system_service.py +0 -0
  36. {vexor-0.19.0a1 → vexor-0.21.0}/vexor/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vexor
3
- Version: 0.19.0a1
3
+ Version: 0.21.0
4
4
  Summary: A vector-powered CLI for semantic search over files.
5
5
  Project-URL: Repository, https://github.com/scarletkc/vexor
6
6
  Author: scarletkc
@@ -69,9 +69,8 @@ Description-Content-Type: text/markdown
69
69
 
70
70
  ---
71
71
 
72
- **Vexor** is a vector-powered CLI and desktop app for semantic file search. It uses configurable embedding models and ranks results by cosine similarity.
73
-
74
- ![GUI](https://raw.githubusercontent.com/scarletkc/vexor/refs/heads/main/assets/gui_demo.png)
72
+ **Vexor** is a semantic search engine that builds reusable indexes over files and code.
73
+ It supports configurable embedding and reranking providers, and exposes the same core through a Python API, a CLI tool, and an optional desktop frontend.
75
74
 
76
75
  <video src="https://github.com/user-attachments/assets/4d53eefd-ab35-4232-98a7-f8dc005983a9" controls="controls" style="max-width: 600px;">
77
76
  Vexor Demo Video
@@ -98,18 +97,13 @@ vexor init
98
97
  ```
99
98
  The wizard also runs automatically on first use when no config exists.
100
99
 
101
- ### 1. Configure API Key
102
- ```bash
103
- vexor config --set-api-key "YOUR_KEY"
104
- ```
105
- Or via environment: `VEXOR_API_KEY`, `OPENAI_API_KEY`, or `GOOGLE_GENAI_API_KEY`.
106
-
107
- ### 2. Search
100
+ ### 1. Search
108
101
  ```bash
109
- vexor "api client config" # defaults to search
110
- vexor search "api client config" # searches current directory
102
+ vexor "api client config" # defaults to search current directory
111
103
  # or explicit path:
112
104
  vexor search "api client config" --path ~/projects/demo --top 5
105
+ # in-memory search only:
106
+ vexor search "api client config" --no-cache
113
107
  ```
114
108
 
115
109
  Vexor auto-indexes on first search. Example output:
@@ -122,7 +116,7 @@ Vexor semantic file search results
122
116
  3 0.809 ./tests/test_config_loader.py - tests for config loader
123
117
  ```
124
118
 
125
- ### 3. Explicit Index (Optional)
119
+ ### 2. Explicit Index (Optional)
126
120
  ```bash
127
121
  vexor index # indexes current directory
128
122
  # or explicit path:
@@ -130,6 +124,15 @@ vexor index --path ~/projects/demo --mode code
130
124
  ```
131
125
  Useful for CI warmup or when `auto_index` is disabled.
132
126
 
127
+ ## Desktop App (Experimental)
128
+
129
+ > The desktop app is experimental and not actively maintained.
130
+ > It may be unstable. For production use, prefer the CLI.
131
+
132
+ ![GUI](https://raw.githubusercontent.com/scarletkc/vexor/refs/heads/main/assets/gui_demo.png)
133
+
134
+ Download the desktop app from [releases](https://github.com/scarletkc/vexor/releases).
135
+
133
136
  ## Python API
134
137
 
135
138
  Vexor can also be imported and used directly from Python:
@@ -144,8 +147,19 @@ for hit in response.results:
144
147
  print(hit.path, hit.score)
145
148
  ```
146
149
 
147
- By default it reads `~/.vexor/config.json`. To ignore config and pass everything explicitly,
148
- set `use_config=False`.
150
+ By default it reads `~/.vexor/config.json`. For runtime config overrides, cache
151
+ controls, and per-call options, see [`docs/api/python.md`](https://github.com/scarletkc/vexor/tree/main/docs/api/python.md).
152
+
153
+ ## AI Agent Skill
154
+
155
+ This repo includes a skill for AI agents to use Vexor effectively:
156
+
157
+ ```bash
158
+ vexor install --skills claude # Claude Code
159
+ vexor install --skills codex # Codex
160
+ ```
161
+
162
+ Skill source: [`plugins/vexor/skills/vexor-cli`](https://github.com/scarletkc/vexor/raw/refs/heads/main/plugins/vexor/skills/vexor-cli/SKILL.md)
149
163
 
150
164
  ## Configuration
151
165
 
@@ -153,7 +167,9 @@ set `use_config=False`.
153
167
  vexor config --set-provider openai # default; also supports gemini/custom/local
154
168
  vexor config --set-model text-embedding-3-small
155
169
  vexor config --set-batch-size 0 # 0 = single request
156
- vexor config --set-embed-concurrency 2 # parallel embedding requests
170
+ vexor config --set-embed-concurrency 4 # parallel embedding requests
171
+ vexor config --set-extract-concurrency 4 # parallel file extraction workers
172
+ vexor config --set-extract-backend auto # auto|thread|process (default: auto)
157
173
  vexor config --set-auto-index true # auto-index before search (default)
158
174
  vexor config --rerank bm25 # optional BM25 rerank for top-k results
159
175
  vexor config --rerank flashrank # FlashRank rerank (requires optional extra)
@@ -175,10 +191,16 @@ FlashRank requires `pip install "vexor[flashrank]"` and caches models under `~/.
175
191
 
176
192
  Config stored in `~/.vexor/config.json`.
177
193
 
194
+ ### Configure API Key
195
+ ```bash
196
+ vexor config --set-api-key "YOUR_KEY"
197
+ ```
198
+ Or via environment: `VEXOR_API_KEY`, `OPENAI_API_KEY`, or `GOOGLE_GENAI_API_KEY`.
199
+
178
200
  ### Rerank
179
201
 
180
- Rerank reorders the semantic results with a secondary ranker. It uses 2x the requested
181
- `--top` as candidates (e.g., top 10 reranked to show 5).
202
+ Rerank reorders the semantic results with a secondary ranker. Candidate sizing uses
203
+ `clamp(int(--top * 2), 20, 150)`.
182
204
 
183
205
  Recommended defaults:
184
206
  - Keep `off` unless you want extra precision.
@@ -285,20 +307,10 @@ Re-running `vexor index` only re-embeds changed files; >50% changes trigger full
285
307
  | `--no-respect-gitignore` | Include gitignored files |
286
308
  | `--format porcelain` | Script-friendly TSV output |
287
309
  | `--format porcelain-z` | NUL-delimited output |
310
+ | `--no-cache` | In-memory only; do not read/write index cache |
288
311
 
289
312
  Porcelain output fields: `rank`, `similarity`, `path`, `chunk_index`, `start_line`, `end_line`, `preview` (line fields are `-` when unavailable).
290
313
 
291
- ## AI Agent Skill
292
-
293
- This repo includes a skill for AI agents to use Vexor effectively:
294
-
295
- ```bash
296
- vexor install --skills claude # Claude Code
297
- vexor install --skills codex # Codex
298
- ```
299
-
300
- Skill source: [`plugins/vexor/skills/vexor-cli`](https://github.com/scarletkc/vexor/raw/refs/heads/main/plugins/vexor/skills/vexor-cli/SKILL.md)
301
-
302
314
  ## Documentation
303
315
 
304
316
  See [docs](https://github.com/scarletkc/vexor/tree/main/docs) for more details.
@@ -14,9 +14,8 @@
14
14
 
15
15
  ---
16
16
 
17
- **Vexor** is a vector-powered CLI and desktop app for semantic file search. It uses configurable embedding models and ranks results by cosine similarity.
18
-
19
- ![GUI](https://raw.githubusercontent.com/scarletkc/vexor/refs/heads/main/assets/gui_demo.png)
17
+ **Vexor** is a semantic search engine that builds reusable indexes over files and code.
18
+ It supports configurable embedding and reranking providers, and exposes the same core through a Python API, a CLI tool, and an optional desktop frontend.
20
19
 
21
20
  <video src="https://github.com/user-attachments/assets/4d53eefd-ab35-4232-98a7-f8dc005983a9" controls="controls" style="max-width: 600px;">
22
21
  Vexor Demo Video
@@ -43,18 +42,13 @@ vexor init
43
42
  ```
44
43
  The wizard also runs automatically on first use when no config exists.
45
44
 
46
- ### 1. Configure API Key
47
- ```bash
48
- vexor config --set-api-key "YOUR_KEY"
49
- ```
50
- Or via environment: `VEXOR_API_KEY`, `OPENAI_API_KEY`, or `GOOGLE_GENAI_API_KEY`.
51
-
52
- ### 2. Search
45
+ ### 1. Search
53
46
  ```bash
54
- vexor "api client config" # defaults to search
55
- vexor search "api client config" # searches current directory
47
+ vexor "api client config" # defaults to search current directory
56
48
  # or explicit path:
57
49
  vexor search "api client config" --path ~/projects/demo --top 5
50
+ # in-memory search only:
51
+ vexor search "api client config" --no-cache
58
52
  ```
59
53
 
60
54
  Vexor auto-indexes on first search. Example output:
@@ -67,7 +61,7 @@ Vexor semantic file search results
67
61
  3 0.809 ./tests/test_config_loader.py - tests for config loader
68
62
  ```
69
63
 
70
- ### 3. Explicit Index (Optional)
64
+ ### 2. Explicit Index (Optional)
71
65
  ```bash
72
66
  vexor index # indexes current directory
73
67
  # or explicit path:
@@ -75,6 +69,15 @@ vexor index --path ~/projects/demo --mode code
75
69
  ```
76
70
  Useful for CI warmup or when `auto_index` is disabled.
77
71
 
72
+ ## Desktop App (Experimental)
73
+
74
+ > The desktop app is experimental and not actively maintained.
75
+ > It may be unstable. For production use, prefer the CLI.
76
+
77
+ ![GUI](https://raw.githubusercontent.com/scarletkc/vexor/refs/heads/main/assets/gui_demo.png)
78
+
79
+ Download the desktop app from [releases](https://github.com/scarletkc/vexor/releases).
80
+
78
81
  ## Python API
79
82
 
80
83
  Vexor can also be imported and used directly from Python:
@@ -89,8 +92,19 @@ for hit in response.results:
89
92
  print(hit.path, hit.score)
90
93
  ```
91
94
 
92
- By default it reads `~/.vexor/config.json`. To ignore config and pass everything explicitly,
93
- set `use_config=False`.
95
+ By default it reads `~/.vexor/config.json`. For runtime config overrides, cache
96
+ controls, and per-call options, see [`docs/api/python.md`](https://github.com/scarletkc/vexor/tree/main/docs/api/python.md).
97
+
98
+ ## AI Agent Skill
99
+
100
+ This repo includes a skill for AI agents to use Vexor effectively:
101
+
102
+ ```bash
103
+ vexor install --skills claude # Claude Code
104
+ vexor install --skills codex # Codex
105
+ ```
106
+
107
+ Skill source: [`plugins/vexor/skills/vexor-cli`](https://github.com/scarletkc/vexor/raw/refs/heads/main/plugins/vexor/skills/vexor-cli/SKILL.md)
94
108
 
95
109
  ## Configuration
96
110
 
@@ -98,7 +112,9 @@ set `use_config=False`.
98
112
  vexor config --set-provider openai # default; also supports gemini/custom/local
99
113
  vexor config --set-model text-embedding-3-small
100
114
  vexor config --set-batch-size 0 # 0 = single request
101
- vexor config --set-embed-concurrency 2 # parallel embedding requests
115
+ vexor config --set-embed-concurrency 4 # parallel embedding requests
116
+ vexor config --set-extract-concurrency 4 # parallel file extraction workers
117
+ vexor config --set-extract-backend auto # auto|thread|process (default: auto)
102
118
  vexor config --set-auto-index true # auto-index before search (default)
103
119
  vexor config --rerank bm25 # optional BM25 rerank for top-k results
104
120
  vexor config --rerank flashrank # FlashRank rerank (requires optional extra)
@@ -120,10 +136,16 @@ FlashRank requires `pip install "vexor[flashrank]"` and caches models under `~/.
120
136
 
121
137
  Config stored in `~/.vexor/config.json`.
122
138
 
139
+ ### Configure API Key
140
+ ```bash
141
+ vexor config --set-api-key "YOUR_KEY"
142
+ ```
143
+ Or via environment: `VEXOR_API_KEY`, `OPENAI_API_KEY`, or `GOOGLE_GENAI_API_KEY`.
144
+
123
145
  ### Rerank
124
146
 
125
- Rerank reorders the semantic results with a secondary ranker. It uses 2x the requested
126
- `--top` as candidates (e.g., top 10 reranked to show 5).
147
+ Rerank reorders the semantic results with a secondary ranker. Candidate sizing uses
148
+ `clamp(int(--top * 2), 20, 150)`.
127
149
 
128
150
  Recommended defaults:
129
151
  - Keep `off` unless you want extra precision.
@@ -230,20 +252,10 @@ Re-running `vexor index` only re-embeds changed files; >50% changes trigger full
230
252
  | `--no-respect-gitignore` | Include gitignored files |
231
253
  | `--format porcelain` | Script-friendly TSV output |
232
254
  | `--format porcelain-z` | NUL-delimited output |
255
+ | `--no-cache` | In-memory only; do not read/write index cache |
233
256
 
234
257
  Porcelain output fields: `rank`, `similarity`, `path`, `chunk_index`, `start_line`, `end_line`, `preview` (line fields are `-` when unavailable).
235
258
 
236
- ## AI Agent Skill
237
-
238
- This repo includes a skill for AI agents to use Vexor effectively:
239
-
240
- ```bash
241
- vexor install --skills claude # Claude Code
242
- vexor install --skills codex # Codex
243
- ```
244
-
245
- Skill source: [`plugins/vexor/skills/vexor-cli`](https://github.com/scarletkc/vexor/raw/refs/heads/main/plugins/vexor/skills/vexor-cli/SKILL.md)
246
-
247
259
  ## Documentation
248
260
 
249
261
  See [docs](https://github.com/scarletkc/vexor/tree/main/docs) for more details.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "vexor",
3
- "version": "0.19.0a1",
3
+ "version": "0.21.0",
4
4
  "description": "A vector-powered CLI for semantic search over files (Vexor skill bundle).",
5
5
  "author": {
6
6
  "name": "scarletkc"
@@ -31,6 +31,7 @@ vexor "<QUERY>" [--path <ROOT>] [--mode <MODE>] [--ext .py,.md] [--exclude-patte
31
31
  - `--no-respect-gitignore`: include ignored files
32
32
  - `--no-recursive`: only the top directory
33
33
  - `--format`: `rich` (default) or `porcelain`/`porcelain-z` for scripts
34
+ - `--no-cache`: in-memory only, do not read/write index cache
34
35
 
35
36
  ## Modes (pick the cheapest that works)
36
37
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from .api import VexorError, clear_index, index, search
5
+ from .api import VexorError, clear_index, index, search, set_config_json, set_data_dir
6
6
 
7
7
  __all__ = [
8
8
  "__version__",
@@ -11,9 +11,11 @@ __all__ = [
11
11
  "get_version",
12
12
  "index",
13
13
  "search",
14
+ "set_config_json",
15
+ "set_data_dir",
14
16
  ]
15
17
 
16
- __version__ = "0.19.0a1"
18
+ __version__ = "0.21.0"
17
19
 
18
20
 
19
21
  def get_version() -> str:
@@ -4,6 +4,7 @@ from __future__ import annotations
4
4
 
5
5
  from dataclasses import dataclass
6
6
  from pathlib import Path
7
+ from collections.abc import Mapping
7
8
  from typing import Sequence
8
9
 
9
10
  from .config import (
@@ -13,9 +14,12 @@ from .config import (
13
14
  Config,
14
15
  RemoteRerankConfig,
15
16
  SUPPORTED_RERANKERS,
17
+ config_from_json,
16
18
  load_config,
17
19
  resolve_default_model,
20
+ set_config_dir,
18
21
  )
22
+ from .cache import set_cache_dir
19
23
  from .modes import available_modes, get_strategy
20
24
  from .services.index_service import IndexResult, build_index, clear_index_entries
21
25
  from .services.search_service import SearchRequest, SearchResponse, perform_search
@@ -38,6 +42,8 @@ class RuntimeSettings:
38
42
  model_name: str
39
43
  batch_size: int
40
44
  embed_concurrency: int
45
+ extract_concurrency: int
46
+ extract_backend: str
41
47
  base_url: str | None
42
48
  api_key: str | None
43
49
  local_cuda: bool
@@ -47,6 +53,30 @@ class RuntimeSettings:
47
53
  remote_rerank: RemoteRerankConfig | None
48
54
 
49
55
 
56
+ _RUNTIME_CONFIG: Config | None = None
57
+
58
+
59
+ def set_data_dir(path: Path | str | None) -> None:
60
+ """Set the base directory for config and cache data."""
61
+ set_config_dir(path)
62
+ set_cache_dir(path)
63
+
64
+
65
+ def set_config_json(
66
+ payload: Mapping[str, object] | str | None, *, replace: bool = False
67
+ ) -> None:
68
+ """Set in-memory config for API calls from a JSON string or mapping."""
69
+ global _RUNTIME_CONFIG
70
+ if payload is None:
71
+ _RUNTIME_CONFIG = None
72
+ return
73
+ base = None if replace else (_RUNTIME_CONFIG or load_config())
74
+ try:
75
+ _RUNTIME_CONFIG = config_from_json(payload, base=base)
76
+ except ValueError as exc:
77
+ raise VexorError(str(exc)) from exc
78
+
79
+
50
80
  def search(
51
81
  query: str,
52
82
  *,
@@ -62,11 +92,16 @@ def search(
62
92
  model: str | None = None,
63
93
  batch_size: int | None = None,
64
94
  embed_concurrency: int | None = None,
95
+ extract_concurrency: int | None = None,
96
+ extract_backend: str | None = None,
65
97
  base_url: str | None = None,
66
98
  api_key: str | None = None,
67
99
  local_cuda: bool | None = None,
68
100
  auto_index: bool | None = None,
69
101
  use_config: bool = True,
102
+ config: Config | Mapping[str, object] | str | None = None,
103
+ temporary_index: bool = False,
104
+ no_cache: bool = False,
70
105
  ) -> SearchResponse:
71
106
  """Run a semantic search and return ranked results."""
72
107
 
@@ -90,11 +125,15 @@ def search(
90
125
  model=model,
91
126
  batch_size=batch_size,
92
127
  embed_concurrency=embed_concurrency,
128
+ extract_concurrency=extract_concurrency,
129
+ extract_backend=extract_backend,
93
130
  base_url=base_url,
94
131
  api_key=api_key,
95
132
  local_cuda=local_cuda,
96
133
  auto_index=auto_index,
97
134
  use_config=use_config,
135
+ runtime_config=_RUNTIME_CONFIG,
136
+ config_override=config,
98
137
  )
99
138
 
100
139
  request = SearchRequest(
@@ -108,6 +147,8 @@ def search(
108
147
  model_name=settings.model_name,
109
148
  batch_size=settings.batch_size,
110
149
  embed_concurrency=settings.embed_concurrency,
150
+ extract_concurrency=settings.extract_concurrency,
151
+ extract_backend=settings.extract_backend,
111
152
  provider=settings.provider,
112
153
  base_url=settings.base_url,
113
154
  api_key=settings.api_key,
@@ -115,6 +156,8 @@ def search(
115
156
  exclude_patterns=normalized_excludes,
116
157
  extensions=normalized_exts,
117
158
  auto_index=settings.auto_index,
159
+ temporary_index=temporary_index,
160
+ no_cache=no_cache,
118
161
  rerank=settings.rerank,
119
162
  flashrank_model=settings.flashrank_model,
120
163
  remote_rerank=settings.remote_rerank,
@@ -135,10 +178,13 @@ def index(
135
178
  model: str | None = None,
136
179
  batch_size: int | None = None,
137
180
  embed_concurrency: int | None = None,
181
+ extract_concurrency: int | None = None,
182
+ extract_backend: str | None = None,
138
183
  base_url: str | None = None,
139
184
  api_key: str | None = None,
140
185
  local_cuda: bool | None = None,
141
186
  use_config: bool = True,
187
+ config: Config | Mapping[str, object] | str | None = None,
142
188
  ) -> IndexResult:
143
189
  """Build or refresh the index for the given directory."""
144
190
 
@@ -154,11 +200,15 @@ def index(
154
200
  model=model,
155
201
  batch_size=batch_size,
156
202
  embed_concurrency=embed_concurrency,
203
+ extract_concurrency=extract_concurrency,
204
+ extract_backend=extract_backend,
157
205
  base_url=base_url,
158
206
  api_key=api_key,
159
207
  local_cuda=local_cuda,
160
208
  auto_index=None,
161
209
  use_config=use_config,
210
+ runtime_config=_RUNTIME_CONFIG,
211
+ config_override=config,
162
212
  )
163
213
 
164
214
  return build_index(
@@ -170,6 +220,8 @@ def index(
170
220
  model_name=settings.model_name,
171
221
  batch_size=settings.batch_size,
172
222
  embed_concurrency=settings.embed_concurrency,
223
+ extract_concurrency=settings.extract_concurrency,
224
+ extract_backend=settings.extract_backend,
173
225
  provider=settings.provider,
174
226
  base_url=settings.base_url,
175
227
  api_key=settings.api_key,
@@ -220,6 +272,8 @@ def _validate_mode(mode: str) -> str:
220
272
  return mode
221
273
 
222
274
 
275
+
276
+
223
277
  def _normalize_extensions(values: Sequence[str] | str | None) -> tuple[str, ...]:
224
278
  return normalize_extensions(_coerce_iterable(values))
225
279
 
@@ -242,13 +296,23 @@ def _resolve_settings(
242
296
  model: str | None,
243
297
  batch_size: int | None,
244
298
  embed_concurrency: int | None,
299
+ extract_concurrency: int | None,
300
+ extract_backend: str | None,
245
301
  base_url: str | None,
246
302
  api_key: str | None,
247
303
  local_cuda: bool | None,
248
304
  auto_index: bool | None,
249
305
  use_config: bool,
306
+ runtime_config: Config | None = None,
307
+ config_override: Config | Mapping[str, object] | str | None = None,
250
308
  ) -> RuntimeSettings:
251
- config = load_config() if use_config else Config()
309
+ config = (
310
+ runtime_config if (use_config and runtime_config is not None) else None
311
+ )
312
+ if config is None:
313
+ config = load_config() if use_config else Config()
314
+ if config_override is not None:
315
+ config = _apply_config_override(config, config_override)
252
316
  provider_value = (provider or config.provider or DEFAULT_PROVIDER).lower()
253
317
  rerank_value = (config.rerank or DEFAULT_RERANK).strip().lower()
254
318
  if rerank_value not in SUPPORTED_RERANKERS:
@@ -265,11 +329,21 @@ def _resolve_settings(
265
329
  embed_value = (
266
330
  embed_concurrency if embed_concurrency is not None else config.embed_concurrency
267
331
  )
332
+ extract_value = (
333
+ extract_concurrency
334
+ if extract_concurrency is not None
335
+ else config.extract_concurrency
336
+ )
337
+ extract_backend_value = (
338
+ extract_backend if extract_backend is not None else config.extract_backend
339
+ )
268
340
  return RuntimeSettings(
269
341
  provider=provider_value,
270
342
  model_name=model_name,
271
343
  batch_size=batch_value,
272
344
  embed_concurrency=embed_value,
345
+ extract_concurrency=extract_value,
346
+ extract_backend=extract_backend_value,
273
347
  base_url=base_url if base_url is not None else config.base_url,
274
348
  api_key=api_key if api_key is not None else config.api_key,
275
349
  local_cuda=bool(local_cuda if local_cuda is not None else config.local_cuda),
@@ -278,3 +352,15 @@ def _resolve_settings(
278
352
  flashrank_model=config.flashrank_model,
279
353
  remote_rerank=config.remote_rerank,
280
354
  )
355
+
356
+
357
+ def _apply_config_override(
358
+ base: Config,
359
+ override: Config | Mapping[str, object] | str,
360
+ ) -> Config:
361
+ if isinstance(override, Config):
362
+ return override
363
+ try:
364
+ return config_from_json(override, base=base)
365
+ except ValueError as exc:
366
+ raise VexorError(str(exc)) from exc