vexor 0.21.0__py3-none-any.whl → 0.22.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vexor/__init__.py +17 -2
- vexor/api.py +796 -86
- vexor/cache.py +158 -23
- vexor/config.py +47 -6
- vexor/services/content_extract_service.py +6 -0
- vexor/services/search_service.py +42 -24
- {vexor-0.21.0.dist-info → vexor-0.22.0.dist-info}/METADATA +14 -1
- {vexor-0.21.0.dist-info → vexor-0.22.0.dist-info}/RECORD +11 -11
- {vexor-0.21.0.dist-info → vexor-0.22.0.dist-info}/WHEEL +0 -0
- {vexor-0.21.0.dist-info → vexor-0.22.0.dist-info}/entry_points.txt +0 -0
- {vexor-0.21.0.dist-info → vexor-0.22.0.dist-info}/licenses/LICENSE +0 -0
vexor/api.py
CHANGED
|
@@ -3,26 +3,42 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
from dataclasses import dataclass
|
|
6
|
+
from contextlib import ExitStack, contextmanager
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
from collections.abc import Mapping
|
|
8
9
|
from typing import Sequence
|
|
9
10
|
|
|
11
|
+
import numpy as np
|
|
12
|
+
|
|
10
13
|
from .config import (
|
|
11
14
|
DEFAULT_BATCH_SIZE,
|
|
15
|
+
DEFAULT_EXTRACT_BACKEND,
|
|
16
|
+
DEFAULT_EXTRACT_CONCURRENCY,
|
|
12
17
|
DEFAULT_PROVIDER,
|
|
13
18
|
DEFAULT_RERANK,
|
|
14
19
|
Config,
|
|
15
20
|
RemoteRerankConfig,
|
|
16
21
|
SUPPORTED_RERANKERS,
|
|
17
22
|
config_from_json,
|
|
23
|
+
config_dir_context,
|
|
18
24
|
load_config,
|
|
19
25
|
resolve_default_model,
|
|
20
26
|
set_config_dir,
|
|
21
27
|
)
|
|
22
|
-
from .cache import set_cache_dir
|
|
28
|
+
from .cache import cache_dir_context, set_cache_dir
|
|
23
29
|
from .modes import available_modes, get_strategy
|
|
24
|
-
from .services.index_service import
|
|
25
|
-
|
|
30
|
+
from .services.index_service import (
|
|
31
|
+
IndexResult,
|
|
32
|
+
build_index,
|
|
33
|
+
build_index_in_memory,
|
|
34
|
+
clear_index_entries,
|
|
35
|
+
)
|
|
36
|
+
from .services.search_service import (
|
|
37
|
+
SearchRequest,
|
|
38
|
+
SearchResponse,
|
|
39
|
+
perform_search,
|
|
40
|
+
search_from_vectors,
|
|
41
|
+
)
|
|
26
42
|
from .text import Messages
|
|
27
43
|
from .utils import (
|
|
28
44
|
ensure_positive,
|
|
@@ -53,9 +69,118 @@ class RuntimeSettings:
|
|
|
53
69
|
remote_rerank: RemoteRerankConfig | None
|
|
54
70
|
|
|
55
71
|
|
|
72
|
+
@dataclass(slots=True)
|
|
73
|
+
class InMemoryIndex:
|
|
74
|
+
base_path: Path
|
|
75
|
+
paths: Sequence[Path]
|
|
76
|
+
vectors: np.ndarray
|
|
77
|
+
metadata: dict[str, object]
|
|
78
|
+
model_name: str
|
|
79
|
+
batch_size: int
|
|
80
|
+
embed_concurrency: int
|
|
81
|
+
provider: str
|
|
82
|
+
base_url: str | None
|
|
83
|
+
api_key: str | None
|
|
84
|
+
local_cuda: bool
|
|
85
|
+
rerank: str = DEFAULT_RERANK
|
|
86
|
+
flashrank_model: str | None = None
|
|
87
|
+
remote_rerank: RemoteRerankConfig | None = None
|
|
88
|
+
|
|
89
|
+
def search(
|
|
90
|
+
self,
|
|
91
|
+
query: str,
|
|
92
|
+
*,
|
|
93
|
+
top: int = 5,
|
|
94
|
+
rerank: str | None = None,
|
|
95
|
+
flashrank_model: str | None = None,
|
|
96
|
+
remote_rerank: RemoteRerankConfig | None = None,
|
|
97
|
+
no_cache: bool = True,
|
|
98
|
+
) -> SearchResponse:
|
|
99
|
+
"""Search against the in-memory index without touching disk."""
|
|
100
|
+
|
|
101
|
+
clean_query = query.strip()
|
|
102
|
+
if not clean_query:
|
|
103
|
+
raise VexorError(Messages.ERROR_EMPTY_QUERY)
|
|
104
|
+
try:
|
|
105
|
+
ensure_positive(top, "top")
|
|
106
|
+
except ValueError as exc:
|
|
107
|
+
raise VexorError(str(exc)) from exc
|
|
108
|
+
|
|
109
|
+
effective_rerank = (rerank or self.rerank or DEFAULT_RERANK).strip().lower()
|
|
110
|
+
if effective_rerank not in SUPPORTED_RERANKERS:
|
|
111
|
+
effective_rerank = DEFAULT_RERANK
|
|
112
|
+
|
|
113
|
+
include_hidden = bool(self.metadata.get("include_hidden", False))
|
|
114
|
+
respect_gitignore = bool(self.metadata.get("respect_gitignore", True))
|
|
115
|
+
mode = str(self.metadata.get("mode", "auto"))
|
|
116
|
+
recursive = bool(self.metadata.get("recursive", True))
|
|
117
|
+
exclude_patterns = tuple(self.metadata.get("exclude_patterns") or ())
|
|
118
|
+
extensions = tuple(self.metadata.get("extensions") or ())
|
|
119
|
+
|
|
120
|
+
request = SearchRequest(
|
|
121
|
+
query=clean_query,
|
|
122
|
+
directory=self.base_path,
|
|
123
|
+
include_hidden=include_hidden,
|
|
124
|
+
respect_gitignore=respect_gitignore,
|
|
125
|
+
mode=mode,
|
|
126
|
+
recursive=recursive,
|
|
127
|
+
top_k=top,
|
|
128
|
+
model_name=self.model_name,
|
|
129
|
+
batch_size=self.batch_size,
|
|
130
|
+
embed_concurrency=self.embed_concurrency,
|
|
131
|
+
extract_concurrency=DEFAULT_EXTRACT_CONCURRENCY,
|
|
132
|
+
extract_backend=DEFAULT_EXTRACT_BACKEND,
|
|
133
|
+
provider=self.provider,
|
|
134
|
+
base_url=self.base_url,
|
|
135
|
+
api_key=self.api_key,
|
|
136
|
+
local_cuda=self.local_cuda,
|
|
137
|
+
exclude_patterns=exclude_patterns,
|
|
138
|
+
extensions=extensions,
|
|
139
|
+
auto_index=False,
|
|
140
|
+
temporary_index=True,
|
|
141
|
+
no_cache=no_cache,
|
|
142
|
+
rerank=effective_rerank,
|
|
143
|
+
flashrank_model=(
|
|
144
|
+
flashrank_model
|
|
145
|
+
if flashrank_model is not None
|
|
146
|
+
else self.flashrank_model
|
|
147
|
+
),
|
|
148
|
+
remote_rerank=(
|
|
149
|
+
remote_rerank if remote_rerank is not None else self.remote_rerank
|
|
150
|
+
),
|
|
151
|
+
)
|
|
152
|
+
return search_from_vectors(
|
|
153
|
+
request,
|
|
154
|
+
paths=self.paths,
|
|
155
|
+
file_vectors=self.vectors,
|
|
156
|
+
metadata=self.metadata,
|
|
157
|
+
is_stale=False,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
|
|
56
161
|
_RUNTIME_CONFIG: Config | None = None
|
|
57
162
|
|
|
58
163
|
|
|
164
|
+
@contextmanager
|
|
165
|
+
def _data_dir_context(
|
|
166
|
+
data_dir: Path | str | None,
|
|
167
|
+
*,
|
|
168
|
+
config_dir: Path | str | None,
|
|
169
|
+
cache_dir: Path | str | None,
|
|
170
|
+
):
|
|
171
|
+
if data_dir is None and config_dir is None and cache_dir is None:
|
|
172
|
+
yield
|
|
173
|
+
return
|
|
174
|
+
effective_config_dir = config_dir if config_dir is not None else data_dir
|
|
175
|
+
effective_cache_dir = cache_dir if cache_dir is not None else data_dir
|
|
176
|
+
with ExitStack() as stack:
|
|
177
|
+
if effective_config_dir is not None:
|
|
178
|
+
stack.enter_context(config_dir_context(effective_config_dir))
|
|
179
|
+
if effective_cache_dir is not None:
|
|
180
|
+
stack.enter_context(cache_dir_context(effective_cache_dir))
|
|
181
|
+
yield
|
|
182
|
+
|
|
183
|
+
|
|
59
184
|
def set_data_dir(path: Path | str | None) -> None:
|
|
60
185
|
"""Set the base directory for config and cache data."""
|
|
61
186
|
set_config_dir(path)
|
|
@@ -77,6 +202,302 @@ def set_config_json(
|
|
|
77
202
|
raise VexorError(str(exc)) from exc
|
|
78
203
|
|
|
79
204
|
|
|
205
|
+
class VexorClient:
|
|
206
|
+
"""Session-style API wrapper for library use."""
|
|
207
|
+
|
|
208
|
+
def __init__(
|
|
209
|
+
self,
|
|
210
|
+
*,
|
|
211
|
+
data_dir: Path | str | None = None,
|
|
212
|
+
config_dir: Path | str | None = None,
|
|
213
|
+
cache_dir: Path | str | None = None,
|
|
214
|
+
use_config: bool = True,
|
|
215
|
+
) -> None:
|
|
216
|
+
self.data_dir = data_dir
|
|
217
|
+
self.config_dir = config_dir
|
|
218
|
+
self.cache_dir = cache_dir
|
|
219
|
+
self.use_config = use_config
|
|
220
|
+
self._runtime_config: Config | None = None
|
|
221
|
+
|
|
222
|
+
def set_config_json(
|
|
223
|
+
self,
|
|
224
|
+
payload: Mapping[str, object] | str | None,
|
|
225
|
+
*,
|
|
226
|
+
replace: bool = False,
|
|
227
|
+
) -> None:
|
|
228
|
+
"""Set in-memory config for this client from a JSON string or mapping."""
|
|
229
|
+
if payload is None:
|
|
230
|
+
self._runtime_config = None
|
|
231
|
+
return
|
|
232
|
+
base = None if replace else (self._runtime_config or load_config())
|
|
233
|
+
try:
|
|
234
|
+
self._runtime_config = config_from_json(payload, base=base)
|
|
235
|
+
except ValueError as exc:
|
|
236
|
+
raise VexorError(str(exc)) from exc
|
|
237
|
+
|
|
238
|
+
@contextmanager
|
|
239
|
+
def config_context(
|
|
240
|
+
self,
|
|
241
|
+
payload: Mapping[str, object] | str | None,
|
|
242
|
+
*,
|
|
243
|
+
replace: bool = False,
|
|
244
|
+
):
|
|
245
|
+
"""Temporarily override this client's in-memory config."""
|
|
246
|
+
previous = self._runtime_config
|
|
247
|
+
self.set_config_json(payload, replace=replace)
|
|
248
|
+
try:
|
|
249
|
+
yield self
|
|
250
|
+
finally:
|
|
251
|
+
self._runtime_config = previous
|
|
252
|
+
|
|
253
|
+
def _resolve_dir_overrides(
|
|
254
|
+
self,
|
|
255
|
+
data_dir: Path | str | None,
|
|
256
|
+
config_dir: Path | str | None,
|
|
257
|
+
cache_dir: Path | str | None,
|
|
258
|
+
) -> tuple[Path | str | None, Path | str | None, Path | str | None]:
|
|
259
|
+
resolved_data_dir = data_dir if data_dir is not None else self.data_dir
|
|
260
|
+
resolved_config_dir = config_dir if config_dir is not None else self.config_dir
|
|
261
|
+
resolved_cache_dir = cache_dir if cache_dir is not None else self.cache_dir
|
|
262
|
+
return resolved_data_dir, resolved_config_dir, resolved_cache_dir
|
|
263
|
+
|
|
264
|
+
def search(
|
|
265
|
+
self,
|
|
266
|
+
query: str,
|
|
267
|
+
*,
|
|
268
|
+
path: Path | str = Path.cwd(),
|
|
269
|
+
top: int = 5,
|
|
270
|
+
include_hidden: bool = False,
|
|
271
|
+
respect_gitignore: bool = True,
|
|
272
|
+
mode: str = "auto",
|
|
273
|
+
recursive: bool = True,
|
|
274
|
+
extensions: Sequence[str] | str | None = None,
|
|
275
|
+
exclude_patterns: Sequence[str] | str | None = None,
|
|
276
|
+
provider: str | None = None,
|
|
277
|
+
model: str | None = None,
|
|
278
|
+
batch_size: int | None = None,
|
|
279
|
+
embed_concurrency: int | None = None,
|
|
280
|
+
extract_concurrency: int | None = None,
|
|
281
|
+
extract_backend: str | None = None,
|
|
282
|
+
base_url: str | None = None,
|
|
283
|
+
api_key: str | None = None,
|
|
284
|
+
local_cuda: bool | None = None,
|
|
285
|
+
auto_index: bool | None = None,
|
|
286
|
+
use_config: bool | None = None,
|
|
287
|
+
config: Config | Mapping[str, object] | str | None = None,
|
|
288
|
+
temporary_index: bool = False,
|
|
289
|
+
no_cache: bool = False,
|
|
290
|
+
data_dir: Path | str | None = None,
|
|
291
|
+
config_dir: Path | str | None = None,
|
|
292
|
+
cache_dir: Path | str | None = None,
|
|
293
|
+
) -> SearchResponse:
|
|
294
|
+
"""Run a semantic search and return ranked results."""
|
|
295
|
+
|
|
296
|
+
resolved_use_config = self.use_config if use_config is None else use_config
|
|
297
|
+
resolved_data_dir, resolved_config_dir, resolved_cache_dir = (
|
|
298
|
+
self._resolve_dir_overrides(data_dir, config_dir, cache_dir)
|
|
299
|
+
)
|
|
300
|
+
return _search_with_settings(
|
|
301
|
+
query,
|
|
302
|
+
path=path,
|
|
303
|
+
top=top,
|
|
304
|
+
include_hidden=include_hidden,
|
|
305
|
+
respect_gitignore=respect_gitignore,
|
|
306
|
+
mode=mode,
|
|
307
|
+
recursive=recursive,
|
|
308
|
+
extensions=extensions,
|
|
309
|
+
exclude_patterns=exclude_patterns,
|
|
310
|
+
provider=provider,
|
|
311
|
+
model=model,
|
|
312
|
+
batch_size=batch_size,
|
|
313
|
+
embed_concurrency=embed_concurrency,
|
|
314
|
+
extract_concurrency=extract_concurrency,
|
|
315
|
+
extract_backend=extract_backend,
|
|
316
|
+
base_url=base_url,
|
|
317
|
+
api_key=api_key,
|
|
318
|
+
local_cuda=local_cuda,
|
|
319
|
+
auto_index=auto_index,
|
|
320
|
+
use_config=resolved_use_config,
|
|
321
|
+
config=config,
|
|
322
|
+
temporary_index=temporary_index,
|
|
323
|
+
no_cache=no_cache,
|
|
324
|
+
runtime_config=self._runtime_config,
|
|
325
|
+
data_dir=resolved_data_dir,
|
|
326
|
+
config_dir=resolved_config_dir,
|
|
327
|
+
cache_dir=resolved_cache_dir,
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
def index(
|
|
331
|
+
self,
|
|
332
|
+
path: Path | str = Path.cwd(),
|
|
333
|
+
*,
|
|
334
|
+
include_hidden: bool = False,
|
|
335
|
+
respect_gitignore: bool = True,
|
|
336
|
+
mode: str = "auto",
|
|
337
|
+
recursive: bool = True,
|
|
338
|
+
extensions: Sequence[str] | str | None = None,
|
|
339
|
+
exclude_patterns: Sequence[str] | str | None = None,
|
|
340
|
+
provider: str | None = None,
|
|
341
|
+
model: str | None = None,
|
|
342
|
+
batch_size: int | None = None,
|
|
343
|
+
embed_concurrency: int | None = None,
|
|
344
|
+
extract_concurrency: int | None = None,
|
|
345
|
+
extract_backend: str | None = None,
|
|
346
|
+
base_url: str | None = None,
|
|
347
|
+
api_key: str | None = None,
|
|
348
|
+
local_cuda: bool | None = None,
|
|
349
|
+
use_config: bool | None = None,
|
|
350
|
+
config: Config | Mapping[str, object] | str | None = None,
|
|
351
|
+
data_dir: Path | str | None = None,
|
|
352
|
+
config_dir: Path | str | None = None,
|
|
353
|
+
cache_dir: Path | str | None = None,
|
|
354
|
+
) -> IndexResult:
|
|
355
|
+
"""Build or refresh the index for the given directory."""
|
|
356
|
+
|
|
357
|
+
resolved_use_config = self.use_config if use_config is None else use_config
|
|
358
|
+
resolved_data_dir, resolved_config_dir, resolved_cache_dir = (
|
|
359
|
+
self._resolve_dir_overrides(data_dir, config_dir, cache_dir)
|
|
360
|
+
)
|
|
361
|
+
return _index_with_settings(
|
|
362
|
+
path=path,
|
|
363
|
+
include_hidden=include_hidden,
|
|
364
|
+
respect_gitignore=respect_gitignore,
|
|
365
|
+
mode=mode,
|
|
366
|
+
recursive=recursive,
|
|
367
|
+
extensions=extensions,
|
|
368
|
+
exclude_patterns=exclude_patterns,
|
|
369
|
+
provider=provider,
|
|
370
|
+
model=model,
|
|
371
|
+
batch_size=batch_size,
|
|
372
|
+
embed_concurrency=embed_concurrency,
|
|
373
|
+
extract_concurrency=extract_concurrency,
|
|
374
|
+
extract_backend=extract_backend,
|
|
375
|
+
base_url=base_url,
|
|
376
|
+
api_key=api_key,
|
|
377
|
+
local_cuda=local_cuda,
|
|
378
|
+
use_config=resolved_use_config,
|
|
379
|
+
config=config,
|
|
380
|
+
runtime_config=self._runtime_config,
|
|
381
|
+
data_dir=resolved_data_dir,
|
|
382
|
+
config_dir=resolved_config_dir,
|
|
383
|
+
cache_dir=resolved_cache_dir,
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
def index_in_memory(
|
|
387
|
+
self,
|
|
388
|
+
path: Path | str = Path.cwd(),
|
|
389
|
+
*,
|
|
390
|
+
include_hidden: bool = False,
|
|
391
|
+
respect_gitignore: bool = True,
|
|
392
|
+
mode: str = "auto",
|
|
393
|
+
recursive: bool = True,
|
|
394
|
+
extensions: Sequence[str] | str | None = None,
|
|
395
|
+
exclude_patterns: Sequence[str] | str | None = None,
|
|
396
|
+
provider: str | None = None,
|
|
397
|
+
model: str | None = None,
|
|
398
|
+
batch_size: int | None = None,
|
|
399
|
+
embed_concurrency: int | None = None,
|
|
400
|
+
extract_concurrency: int | None = None,
|
|
401
|
+
extract_backend: str | None = None,
|
|
402
|
+
base_url: str | None = None,
|
|
403
|
+
api_key: str | None = None,
|
|
404
|
+
local_cuda: bool | None = None,
|
|
405
|
+
use_config: bool | None = None,
|
|
406
|
+
config: Config | Mapping[str, object] | str | None = None,
|
|
407
|
+
no_cache: bool = True,
|
|
408
|
+
data_dir: Path | str | None = None,
|
|
409
|
+
config_dir: Path | str | None = None,
|
|
410
|
+
cache_dir: Path | str | None = None,
|
|
411
|
+
) -> InMemoryIndex:
|
|
412
|
+
"""Build an index in memory without writing to disk."""
|
|
413
|
+
|
|
414
|
+
resolved_use_config = self.use_config if use_config is None else use_config
|
|
415
|
+
resolved_data_dir, resolved_config_dir, resolved_cache_dir = (
|
|
416
|
+
self._resolve_dir_overrides(data_dir, config_dir, cache_dir)
|
|
417
|
+
)
|
|
418
|
+
return _index_in_memory_with_settings(
|
|
419
|
+
path=path,
|
|
420
|
+
include_hidden=include_hidden,
|
|
421
|
+
respect_gitignore=respect_gitignore,
|
|
422
|
+
mode=mode,
|
|
423
|
+
recursive=recursive,
|
|
424
|
+
extensions=extensions,
|
|
425
|
+
exclude_patterns=exclude_patterns,
|
|
426
|
+
provider=provider,
|
|
427
|
+
model=model,
|
|
428
|
+
batch_size=batch_size,
|
|
429
|
+
embed_concurrency=embed_concurrency,
|
|
430
|
+
extract_concurrency=extract_concurrency,
|
|
431
|
+
extract_backend=extract_backend,
|
|
432
|
+
base_url=base_url,
|
|
433
|
+
api_key=api_key,
|
|
434
|
+
local_cuda=local_cuda,
|
|
435
|
+
use_config=resolved_use_config,
|
|
436
|
+
config=config,
|
|
437
|
+
no_cache=no_cache,
|
|
438
|
+
runtime_config=self._runtime_config,
|
|
439
|
+
data_dir=resolved_data_dir,
|
|
440
|
+
config_dir=resolved_config_dir,
|
|
441
|
+
cache_dir=resolved_cache_dir,
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
def clear_index(
|
|
445
|
+
self,
|
|
446
|
+
path: Path | str = Path.cwd(),
|
|
447
|
+
*,
|
|
448
|
+
include_hidden: bool = False,
|
|
449
|
+
respect_gitignore: bool = True,
|
|
450
|
+
mode: str = "auto",
|
|
451
|
+
recursive: bool = True,
|
|
452
|
+
extensions: Sequence[str] | str | None = None,
|
|
453
|
+
exclude_patterns: Sequence[str] | str | None = None,
|
|
454
|
+
data_dir: Path | str | None = None,
|
|
455
|
+
config_dir: Path | str | None = None,
|
|
456
|
+
cache_dir: Path | str | None = None,
|
|
457
|
+
) -> int:
|
|
458
|
+
"""Clear cached index entries for the given directory."""
|
|
459
|
+
|
|
460
|
+
resolved_data_dir, resolved_config_dir, resolved_cache_dir = (
|
|
461
|
+
self._resolve_dir_overrides(data_dir, config_dir, cache_dir)
|
|
462
|
+
)
|
|
463
|
+
return _clear_index_with_settings(
|
|
464
|
+
path=path,
|
|
465
|
+
include_hidden=include_hidden,
|
|
466
|
+
respect_gitignore=respect_gitignore,
|
|
467
|
+
mode=mode,
|
|
468
|
+
recursive=recursive,
|
|
469
|
+
extensions=extensions,
|
|
470
|
+
exclude_patterns=exclude_patterns,
|
|
471
|
+
data_dir=resolved_data_dir,
|
|
472
|
+
config_dir=resolved_config_dir,
|
|
473
|
+
cache_dir=resolved_cache_dir,
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
@contextmanager
|
|
478
|
+
def config_context(
|
|
479
|
+
payload: Mapping[str, object] | str | None,
|
|
480
|
+
*,
|
|
481
|
+
replace: bool = False,
|
|
482
|
+
data_dir: Path | str | None = None,
|
|
483
|
+
config_dir: Path | str | None = None,
|
|
484
|
+
cache_dir: Path | str | None = None,
|
|
485
|
+
use_config: bool = True,
|
|
486
|
+
):
|
|
487
|
+
"""Yield a configured client for scoped API usage."""
|
|
488
|
+
client = VexorClient(
|
|
489
|
+
data_dir=data_dir,
|
|
490
|
+
config_dir=config_dir,
|
|
491
|
+
cache_dir=cache_dir,
|
|
492
|
+
use_config=use_config,
|
|
493
|
+
)
|
|
494
|
+
client.set_config_json(payload, replace=replace)
|
|
495
|
+
try:
|
|
496
|
+
yield client
|
|
497
|
+
finally:
|
|
498
|
+
client.set_config_json(None)
|
|
499
|
+
|
|
500
|
+
|
|
80
501
|
def search(
|
|
81
502
|
query: str,
|
|
82
503
|
*,
|
|
@@ -102,25 +523,21 @@ def search(
|
|
|
102
523
|
config: Config | Mapping[str, object] | str | None = None,
|
|
103
524
|
temporary_index: bool = False,
|
|
104
525
|
no_cache: bool = False,
|
|
526
|
+
data_dir: Path | str | None = None,
|
|
527
|
+
config_dir: Path | str | None = None,
|
|
528
|
+
cache_dir: Path | str | None = None,
|
|
105
529
|
) -> SearchResponse:
|
|
106
530
|
"""Run a semantic search and return ranked results."""
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
mode_value = _validate_mode(mode)
|
|
118
|
-
normalized_exts = _normalize_extensions(extensions)
|
|
119
|
-
normalized_excludes = _normalize_excludes(exclude_patterns)
|
|
120
|
-
if extensions and not normalized_exts:
|
|
121
|
-
raise VexorError(Messages.ERROR_EXTENSIONS_EMPTY)
|
|
122
|
-
|
|
123
|
-
settings = _resolve_settings(
|
|
531
|
+
return _search_with_settings(
|
|
532
|
+
query,
|
|
533
|
+
path=path,
|
|
534
|
+
top=top,
|
|
535
|
+
include_hidden=include_hidden,
|
|
536
|
+
respect_gitignore=respect_gitignore,
|
|
537
|
+
mode=mode,
|
|
538
|
+
recursive=recursive,
|
|
539
|
+
extensions=extensions,
|
|
540
|
+
exclude_patterns=exclude_patterns,
|
|
124
541
|
provider=provider,
|
|
125
542
|
model=model,
|
|
126
543
|
batch_size=batch_size,
|
|
@@ -132,37 +549,14 @@ def search(
|
|
|
132
549
|
local_cuda=local_cuda,
|
|
133
550
|
auto_index=auto_index,
|
|
134
551
|
use_config=use_config,
|
|
135
|
-
|
|
136
|
-
config_override=config,
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
request = SearchRequest(
|
|
140
|
-
query=clean_query,
|
|
141
|
-
directory=directory,
|
|
142
|
-
include_hidden=include_hidden,
|
|
143
|
-
respect_gitignore=respect_gitignore,
|
|
144
|
-
mode=mode_value,
|
|
145
|
-
recursive=recursive,
|
|
146
|
-
top_k=top,
|
|
147
|
-
model_name=settings.model_name,
|
|
148
|
-
batch_size=settings.batch_size,
|
|
149
|
-
embed_concurrency=settings.embed_concurrency,
|
|
150
|
-
extract_concurrency=settings.extract_concurrency,
|
|
151
|
-
extract_backend=settings.extract_backend,
|
|
152
|
-
provider=settings.provider,
|
|
153
|
-
base_url=settings.base_url,
|
|
154
|
-
api_key=settings.api_key,
|
|
155
|
-
local_cuda=settings.local_cuda,
|
|
156
|
-
exclude_patterns=normalized_excludes,
|
|
157
|
-
extensions=normalized_exts,
|
|
158
|
-
auto_index=settings.auto_index,
|
|
552
|
+
config=config,
|
|
159
553
|
temporary_index=temporary_index,
|
|
160
554
|
no_cache=no_cache,
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
555
|
+
runtime_config=_RUNTIME_CONFIG,
|
|
556
|
+
data_dir=data_dir,
|
|
557
|
+
config_dir=config_dir,
|
|
558
|
+
cache_dir=cache_dir,
|
|
164
559
|
)
|
|
165
|
-
return perform_search(request)
|
|
166
560
|
|
|
167
561
|
|
|
168
562
|
def index(
|
|
@@ -185,17 +579,19 @@ def index(
|
|
|
185
579
|
local_cuda: bool | None = None,
|
|
186
580
|
use_config: bool = True,
|
|
187
581
|
config: Config | Mapping[str, object] | str | None = None,
|
|
582
|
+
data_dir: Path | str | None = None,
|
|
583
|
+
config_dir: Path | str | None = None,
|
|
584
|
+
cache_dir: Path | str | None = None,
|
|
188
585
|
) -> IndexResult:
|
|
189
586
|
"""Build or refresh the index for the given directory."""
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
settings = _resolve_settings(
|
|
587
|
+
return _index_with_settings(
|
|
588
|
+
path=path,
|
|
589
|
+
include_hidden=include_hidden,
|
|
590
|
+
respect_gitignore=respect_gitignore,
|
|
591
|
+
mode=mode,
|
|
592
|
+
recursive=recursive,
|
|
593
|
+
extensions=extensions,
|
|
594
|
+
exclude_patterns=exclude_patterns,
|
|
199
595
|
provider=provider,
|
|
200
596
|
model=model,
|
|
201
597
|
batch_size=batch_size,
|
|
@@ -205,29 +601,65 @@ def index(
|
|
|
205
601
|
base_url=base_url,
|
|
206
602
|
api_key=api_key,
|
|
207
603
|
local_cuda=local_cuda,
|
|
208
|
-
auto_index=None,
|
|
209
604
|
use_config=use_config,
|
|
605
|
+
config=config,
|
|
210
606
|
runtime_config=_RUNTIME_CONFIG,
|
|
211
|
-
|
|
607
|
+
data_dir=data_dir,
|
|
608
|
+
config_dir=config_dir,
|
|
609
|
+
cache_dir=cache_dir,
|
|
212
610
|
)
|
|
213
611
|
|
|
214
|
-
|
|
215
|
-
|
|
612
|
+
|
|
613
|
+
def index_in_memory(
|
|
614
|
+
path: Path | str = Path.cwd(),
|
|
615
|
+
*,
|
|
616
|
+
include_hidden: bool = False,
|
|
617
|
+
respect_gitignore: bool = True,
|
|
618
|
+
mode: str = "auto",
|
|
619
|
+
recursive: bool = True,
|
|
620
|
+
extensions: Sequence[str] | str | None = None,
|
|
621
|
+
exclude_patterns: Sequence[str] | str | None = None,
|
|
622
|
+
provider: str | None = None,
|
|
623
|
+
model: str | None = None,
|
|
624
|
+
batch_size: int | None = None,
|
|
625
|
+
embed_concurrency: int | None = None,
|
|
626
|
+
extract_concurrency: int | None = None,
|
|
627
|
+
extract_backend: str | None = None,
|
|
628
|
+
base_url: str | None = None,
|
|
629
|
+
api_key: str | None = None,
|
|
630
|
+
local_cuda: bool | None = None,
|
|
631
|
+
use_config: bool = True,
|
|
632
|
+
config: Config | Mapping[str, object] | str | None = None,
|
|
633
|
+
no_cache: bool = True,
|
|
634
|
+
data_dir: Path | str | None = None,
|
|
635
|
+
config_dir: Path | str | None = None,
|
|
636
|
+
cache_dir: Path | str | None = None,
|
|
637
|
+
) -> InMemoryIndex:
|
|
638
|
+
"""Build an index in memory without writing to disk."""
|
|
639
|
+
return _index_in_memory_with_settings(
|
|
640
|
+
path=path,
|
|
216
641
|
include_hidden=include_hidden,
|
|
217
642
|
respect_gitignore=respect_gitignore,
|
|
218
|
-
mode=
|
|
643
|
+
mode=mode,
|
|
219
644
|
recursive=recursive,
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
645
|
+
extensions=extensions,
|
|
646
|
+
exclude_patterns=exclude_patterns,
|
|
647
|
+
provider=provider,
|
|
648
|
+
model=model,
|
|
649
|
+
batch_size=batch_size,
|
|
650
|
+
embed_concurrency=embed_concurrency,
|
|
651
|
+
extract_concurrency=extract_concurrency,
|
|
652
|
+
extract_backend=extract_backend,
|
|
653
|
+
base_url=base_url,
|
|
654
|
+
api_key=api_key,
|
|
655
|
+
local_cuda=local_cuda,
|
|
656
|
+
use_config=use_config,
|
|
657
|
+
config=config,
|
|
658
|
+
no_cache=no_cache,
|
|
659
|
+
runtime_config=_RUNTIME_CONFIG,
|
|
660
|
+
data_dir=data_dir,
|
|
661
|
+
config_dir=config_dir,
|
|
662
|
+
cache_dir=cache_dir,
|
|
231
663
|
)
|
|
232
664
|
|
|
233
665
|
|
|
@@ -240,27 +672,305 @@ def clear_index(
|
|
|
240
672
|
recursive: bool = True,
|
|
241
673
|
extensions: Sequence[str] | str | None = None,
|
|
242
674
|
exclude_patterns: Sequence[str] | str | None = None,
|
|
675
|
+
data_dir: Path | str | None = None,
|
|
676
|
+
config_dir: Path | str | None = None,
|
|
677
|
+
cache_dir: Path | str | None = None,
|
|
243
678
|
) -> int:
|
|
244
679
|
"""Clear cached index entries for the given directory."""
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
mode_value = _validate_mode(mode)
|
|
248
|
-
normalized_exts = _normalize_extensions(extensions)
|
|
249
|
-
normalized_excludes = _normalize_excludes(exclude_patterns)
|
|
250
|
-
if extensions and not normalized_exts:
|
|
251
|
-
raise VexorError(Messages.ERROR_EXTENSIONS_EMPTY)
|
|
252
|
-
|
|
253
|
-
return clear_index_entries(
|
|
254
|
-
directory,
|
|
680
|
+
return _clear_index_with_settings(
|
|
681
|
+
path=path,
|
|
255
682
|
include_hidden=include_hidden,
|
|
256
683
|
respect_gitignore=respect_gitignore,
|
|
257
|
-
mode=
|
|
684
|
+
mode=mode,
|
|
258
685
|
recursive=recursive,
|
|
259
|
-
|
|
260
|
-
|
|
686
|
+
extensions=extensions,
|
|
687
|
+
exclude_patterns=exclude_patterns,
|
|
688
|
+
data_dir=data_dir,
|
|
689
|
+
config_dir=config_dir,
|
|
690
|
+
cache_dir=cache_dir,
|
|
261
691
|
)
|
|
262
692
|
|
|
263
693
|
|
|
694
|
+
def _search_with_settings(
|
|
695
|
+
query: str,
|
|
696
|
+
*,
|
|
697
|
+
path: Path | str,
|
|
698
|
+
top: int,
|
|
699
|
+
include_hidden: bool,
|
|
700
|
+
respect_gitignore: bool,
|
|
701
|
+
mode: str,
|
|
702
|
+
recursive: bool,
|
|
703
|
+
extensions: Sequence[str] | str | None,
|
|
704
|
+
exclude_patterns: Sequence[str] | str | None,
|
|
705
|
+
provider: str | None,
|
|
706
|
+
model: str | None,
|
|
707
|
+
batch_size: int | None,
|
|
708
|
+
embed_concurrency: int | None,
|
|
709
|
+
extract_concurrency: int | None,
|
|
710
|
+
extract_backend: str | None,
|
|
711
|
+
base_url: str | None,
|
|
712
|
+
api_key: str | None,
|
|
713
|
+
local_cuda: bool | None,
|
|
714
|
+
auto_index: bool | None,
|
|
715
|
+
use_config: bool,
|
|
716
|
+
config: Config | Mapping[str, object] | str | None,
|
|
717
|
+
temporary_index: bool,
|
|
718
|
+
no_cache: bool,
|
|
719
|
+
runtime_config: Config | None,
|
|
720
|
+
data_dir: Path | str | None,
|
|
721
|
+
config_dir: Path | str | None,
|
|
722
|
+
cache_dir: Path | str | None,
|
|
723
|
+
) -> SearchResponse:
|
|
724
|
+
with _data_dir_context(data_dir, config_dir=config_dir, cache_dir=cache_dir):
|
|
725
|
+
clean_query = query.strip()
|
|
726
|
+
if not clean_query:
|
|
727
|
+
raise VexorError(Messages.ERROR_EMPTY_QUERY)
|
|
728
|
+
try:
|
|
729
|
+
ensure_positive(top, "top")
|
|
730
|
+
except ValueError as exc:
|
|
731
|
+
raise VexorError(str(exc)) from exc
|
|
732
|
+
|
|
733
|
+
directory = resolve_directory(path)
|
|
734
|
+
mode_value = _validate_mode(mode)
|
|
735
|
+
normalized_exts = _normalize_extensions(extensions)
|
|
736
|
+
normalized_excludes = _normalize_excludes(exclude_patterns)
|
|
737
|
+
if extensions and not normalized_exts:
|
|
738
|
+
raise VexorError(Messages.ERROR_EXTENSIONS_EMPTY)
|
|
739
|
+
|
|
740
|
+
settings = _resolve_settings(
|
|
741
|
+
provider=provider,
|
|
742
|
+
model=model,
|
|
743
|
+
batch_size=batch_size,
|
|
744
|
+
embed_concurrency=embed_concurrency,
|
|
745
|
+
extract_concurrency=extract_concurrency,
|
|
746
|
+
extract_backend=extract_backend,
|
|
747
|
+
base_url=base_url,
|
|
748
|
+
api_key=api_key,
|
|
749
|
+
local_cuda=local_cuda,
|
|
750
|
+
auto_index=auto_index,
|
|
751
|
+
use_config=use_config,
|
|
752
|
+
runtime_config=runtime_config,
|
|
753
|
+
config_override=config,
|
|
754
|
+
)
|
|
755
|
+
|
|
756
|
+
request = SearchRequest(
|
|
757
|
+
query=clean_query,
|
|
758
|
+
directory=directory,
|
|
759
|
+
include_hidden=include_hidden,
|
|
760
|
+
respect_gitignore=respect_gitignore,
|
|
761
|
+
mode=mode_value,
|
|
762
|
+
recursive=recursive,
|
|
763
|
+
top_k=top,
|
|
764
|
+
model_name=settings.model_name,
|
|
765
|
+
batch_size=settings.batch_size,
|
|
766
|
+
embed_concurrency=settings.embed_concurrency,
|
|
767
|
+
extract_concurrency=settings.extract_concurrency,
|
|
768
|
+
extract_backend=settings.extract_backend,
|
|
769
|
+
provider=settings.provider,
|
|
770
|
+
base_url=settings.base_url,
|
|
771
|
+
api_key=settings.api_key,
|
|
772
|
+
local_cuda=settings.local_cuda,
|
|
773
|
+
exclude_patterns=normalized_excludes,
|
|
774
|
+
extensions=normalized_exts,
|
|
775
|
+
auto_index=settings.auto_index,
|
|
776
|
+
temporary_index=temporary_index,
|
|
777
|
+
no_cache=no_cache,
|
|
778
|
+
rerank=settings.rerank,
|
|
779
|
+
flashrank_model=settings.flashrank_model,
|
|
780
|
+
remote_rerank=settings.remote_rerank,
|
|
781
|
+
)
|
|
782
|
+
return perform_search(request)
|
|
783
|
+
|
|
784
|
+
|
|
785
|
+
def _index_with_settings(
|
|
786
|
+
*,
|
|
787
|
+
path: Path | str,
|
|
788
|
+
include_hidden: bool,
|
|
789
|
+
respect_gitignore: bool,
|
|
790
|
+
mode: str,
|
|
791
|
+
recursive: bool,
|
|
792
|
+
extensions: Sequence[str] | str | None,
|
|
793
|
+
exclude_patterns: Sequence[str] | str | None,
|
|
794
|
+
provider: str | None,
|
|
795
|
+
model: str | None,
|
|
796
|
+
batch_size: int | None,
|
|
797
|
+
embed_concurrency: int | None,
|
|
798
|
+
extract_concurrency: int | None,
|
|
799
|
+
extract_backend: str | None,
|
|
800
|
+
base_url: str | None,
|
|
801
|
+
api_key: str | None,
|
|
802
|
+
local_cuda: bool | None,
|
|
803
|
+
use_config: bool,
|
|
804
|
+
config: Config | Mapping[str, object] | str | None,
|
|
805
|
+
runtime_config: Config | None,
|
|
806
|
+
data_dir: Path | str | None,
|
|
807
|
+
config_dir: Path | str | None,
|
|
808
|
+
cache_dir: Path | str | None,
|
|
809
|
+
) -> IndexResult:
|
|
810
|
+
with _data_dir_context(data_dir, config_dir=config_dir, cache_dir=cache_dir):
|
|
811
|
+
directory = resolve_directory(path)
|
|
812
|
+
mode_value = _validate_mode(mode)
|
|
813
|
+
normalized_exts = _normalize_extensions(extensions)
|
|
814
|
+
normalized_excludes = _normalize_excludes(exclude_patterns)
|
|
815
|
+
if extensions and not normalized_exts:
|
|
816
|
+
raise VexorError(Messages.ERROR_EXTENSIONS_EMPTY)
|
|
817
|
+
|
|
818
|
+
settings = _resolve_settings(
|
|
819
|
+
provider=provider,
|
|
820
|
+
model=model,
|
|
821
|
+
batch_size=batch_size,
|
|
822
|
+
embed_concurrency=embed_concurrency,
|
|
823
|
+
extract_concurrency=extract_concurrency,
|
|
824
|
+
extract_backend=extract_backend,
|
|
825
|
+
base_url=base_url,
|
|
826
|
+
api_key=api_key,
|
|
827
|
+
local_cuda=local_cuda,
|
|
828
|
+
auto_index=None,
|
|
829
|
+
use_config=use_config,
|
|
830
|
+
runtime_config=runtime_config,
|
|
831
|
+
config_override=config,
|
|
832
|
+
)
|
|
833
|
+
|
|
834
|
+
return build_index(
|
|
835
|
+
directory,
|
|
836
|
+
include_hidden=include_hidden,
|
|
837
|
+
respect_gitignore=respect_gitignore,
|
|
838
|
+
mode=mode_value,
|
|
839
|
+
recursive=recursive,
|
|
840
|
+
model_name=settings.model_name,
|
|
841
|
+
batch_size=settings.batch_size,
|
|
842
|
+
embed_concurrency=settings.embed_concurrency,
|
|
843
|
+
extract_concurrency=settings.extract_concurrency,
|
|
844
|
+
extract_backend=settings.extract_backend,
|
|
845
|
+
provider=settings.provider,
|
|
846
|
+
base_url=settings.base_url,
|
|
847
|
+
api_key=settings.api_key,
|
|
848
|
+
local_cuda=settings.local_cuda,
|
|
849
|
+
exclude_patterns=normalized_excludes,
|
|
850
|
+
extensions=normalized_exts,
|
|
851
|
+
)
|
|
852
|
+
|
|
853
|
+
|
|
854
|
+
def _index_in_memory_with_settings(
|
|
855
|
+
*,
|
|
856
|
+
path: Path | str,
|
|
857
|
+
include_hidden: bool,
|
|
858
|
+
respect_gitignore: bool,
|
|
859
|
+
mode: str,
|
|
860
|
+
recursive: bool,
|
|
861
|
+
extensions: Sequence[str] | str | None,
|
|
862
|
+
exclude_patterns: Sequence[str] | str | None,
|
|
863
|
+
provider: str | None,
|
|
864
|
+
model: str | None,
|
|
865
|
+
batch_size: int | None,
|
|
866
|
+
embed_concurrency: int | None,
|
|
867
|
+
extract_concurrency: int | None,
|
|
868
|
+
extract_backend: str | None,
|
|
869
|
+
base_url: str | None,
|
|
870
|
+
api_key: str | None,
|
|
871
|
+
local_cuda: bool | None,
|
|
872
|
+
use_config: bool,
|
|
873
|
+
config: Config | Mapping[str, object] | str | None,
|
|
874
|
+
no_cache: bool,
|
|
875
|
+
runtime_config: Config | None,
|
|
876
|
+
data_dir: Path | str | None,
|
|
877
|
+
config_dir: Path | str | None,
|
|
878
|
+
cache_dir: Path | str | None,
|
|
879
|
+
) -> InMemoryIndex:
|
|
880
|
+
with _data_dir_context(data_dir, config_dir=config_dir, cache_dir=cache_dir):
|
|
881
|
+
directory = resolve_directory(path)
|
|
882
|
+
mode_value = _validate_mode(mode)
|
|
883
|
+
normalized_exts = _normalize_extensions(extensions)
|
|
884
|
+
normalized_excludes = _normalize_excludes(exclude_patterns)
|
|
885
|
+
if extensions and not normalized_exts:
|
|
886
|
+
raise VexorError(Messages.ERROR_EXTENSIONS_EMPTY)
|
|
887
|
+
|
|
888
|
+
settings = _resolve_settings(
|
|
889
|
+
provider=provider,
|
|
890
|
+
model=model,
|
|
891
|
+
batch_size=batch_size,
|
|
892
|
+
embed_concurrency=embed_concurrency,
|
|
893
|
+
extract_concurrency=extract_concurrency,
|
|
894
|
+
extract_backend=extract_backend,
|
|
895
|
+
base_url=base_url,
|
|
896
|
+
api_key=api_key,
|
|
897
|
+
local_cuda=local_cuda,
|
|
898
|
+
auto_index=None,
|
|
899
|
+
use_config=use_config,
|
|
900
|
+
runtime_config=runtime_config,
|
|
901
|
+
config_override=config,
|
|
902
|
+
)
|
|
903
|
+
|
|
904
|
+
paths, vectors, metadata = build_index_in_memory(
|
|
905
|
+
directory,
|
|
906
|
+
include_hidden=include_hidden,
|
|
907
|
+
respect_gitignore=respect_gitignore,
|
|
908
|
+
mode=mode_value,
|
|
909
|
+
recursive=recursive,
|
|
910
|
+
model_name=settings.model_name,
|
|
911
|
+
batch_size=settings.batch_size,
|
|
912
|
+
embed_concurrency=settings.embed_concurrency,
|
|
913
|
+
extract_concurrency=settings.extract_concurrency,
|
|
914
|
+
extract_backend=settings.extract_backend,
|
|
915
|
+
provider=settings.provider,
|
|
916
|
+
base_url=settings.base_url,
|
|
917
|
+
api_key=settings.api_key,
|
|
918
|
+
local_cuda=settings.local_cuda,
|
|
919
|
+
exclude_patterns=normalized_excludes,
|
|
920
|
+
extensions=normalized_exts,
|
|
921
|
+
no_cache=no_cache,
|
|
922
|
+
)
|
|
923
|
+
|
|
924
|
+
return InMemoryIndex(
|
|
925
|
+
base_path=directory,
|
|
926
|
+
paths=paths,
|
|
927
|
+
vectors=vectors,
|
|
928
|
+
metadata=metadata,
|
|
929
|
+
model_name=settings.model_name,
|
|
930
|
+
batch_size=settings.batch_size,
|
|
931
|
+
embed_concurrency=settings.embed_concurrency,
|
|
932
|
+
provider=settings.provider,
|
|
933
|
+
base_url=settings.base_url,
|
|
934
|
+
api_key=settings.api_key,
|
|
935
|
+
local_cuda=settings.local_cuda,
|
|
936
|
+
rerank=settings.rerank,
|
|
937
|
+
flashrank_model=settings.flashrank_model,
|
|
938
|
+
remote_rerank=settings.remote_rerank,
|
|
939
|
+
)
|
|
940
|
+
|
|
941
|
+
|
|
942
|
+
def _clear_index_with_settings(
|
|
943
|
+
*,
|
|
944
|
+
path: Path | str,
|
|
945
|
+
include_hidden: bool,
|
|
946
|
+
respect_gitignore: bool,
|
|
947
|
+
mode: str,
|
|
948
|
+
recursive: bool,
|
|
949
|
+
extensions: Sequence[str] | str | None,
|
|
950
|
+
exclude_patterns: Sequence[str] | str | None,
|
|
951
|
+
data_dir: Path | str | None,
|
|
952
|
+
config_dir: Path | str | None,
|
|
953
|
+
cache_dir: Path | str | None,
|
|
954
|
+
) -> int:
|
|
955
|
+
with _data_dir_context(data_dir, config_dir=config_dir, cache_dir=cache_dir):
|
|
956
|
+
directory = resolve_directory(path)
|
|
957
|
+
mode_value = _validate_mode(mode)
|
|
958
|
+
normalized_exts = _normalize_extensions(extensions)
|
|
959
|
+
normalized_excludes = _normalize_excludes(exclude_patterns)
|
|
960
|
+
if extensions and not normalized_exts:
|
|
961
|
+
raise VexorError(Messages.ERROR_EXTENSIONS_EMPTY)
|
|
962
|
+
|
|
963
|
+
return clear_index_entries(
|
|
964
|
+
directory,
|
|
965
|
+
include_hidden=include_hidden,
|
|
966
|
+
respect_gitignore=respect_gitignore,
|
|
967
|
+
mode=mode_value,
|
|
968
|
+
recursive=recursive,
|
|
969
|
+
exclude_patterns=normalized_excludes,
|
|
970
|
+
extensions=normalized_exts,
|
|
971
|
+
)
|
|
972
|
+
|
|
973
|
+
|
|
264
974
|
def _validate_mode(mode: str) -> str:
|
|
265
975
|
try:
|
|
266
976
|
get_strategy(mode)
|