vexor 0.21.0__py3-none-any.whl → 0.22.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vexor/api.py CHANGED
@@ -3,26 +3,42 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from dataclasses import dataclass
6
+ from contextlib import ExitStack, contextmanager
6
7
  from pathlib import Path
7
8
  from collections.abc import Mapping
8
9
  from typing import Sequence
9
10
 
11
+ import numpy as np
12
+
10
13
  from .config import (
11
14
  DEFAULT_BATCH_SIZE,
15
+ DEFAULT_EXTRACT_BACKEND,
16
+ DEFAULT_EXTRACT_CONCURRENCY,
12
17
  DEFAULT_PROVIDER,
13
18
  DEFAULT_RERANK,
14
19
  Config,
15
20
  RemoteRerankConfig,
16
21
  SUPPORTED_RERANKERS,
17
22
  config_from_json,
23
+ config_dir_context,
18
24
  load_config,
19
25
  resolve_default_model,
20
26
  set_config_dir,
21
27
  )
22
- from .cache import set_cache_dir
28
+ from .cache import cache_dir_context, set_cache_dir
23
29
  from .modes import available_modes, get_strategy
24
- from .services.index_service import IndexResult, build_index, clear_index_entries
25
- from .services.search_service import SearchRequest, SearchResponse, perform_search
30
+ from .services.index_service import (
31
+ IndexResult,
32
+ build_index,
33
+ build_index_in_memory,
34
+ clear_index_entries,
35
+ )
36
+ from .services.search_service import (
37
+ SearchRequest,
38
+ SearchResponse,
39
+ perform_search,
40
+ search_from_vectors,
41
+ )
26
42
  from .text import Messages
27
43
  from .utils import (
28
44
  ensure_positive,
@@ -53,9 +69,118 @@ class RuntimeSettings:
53
69
  remote_rerank: RemoteRerankConfig | None
54
70
 
55
71
 
72
+ @dataclass(slots=True)
73
+ class InMemoryIndex:
74
+ base_path: Path
75
+ paths: Sequence[Path]
76
+ vectors: np.ndarray
77
+ metadata: dict[str, object]
78
+ model_name: str
79
+ batch_size: int
80
+ embed_concurrency: int
81
+ provider: str
82
+ base_url: str | None
83
+ api_key: str | None
84
+ local_cuda: bool
85
+ rerank: str = DEFAULT_RERANK
86
+ flashrank_model: str | None = None
87
+ remote_rerank: RemoteRerankConfig | None = None
88
+
89
+ def search(
90
+ self,
91
+ query: str,
92
+ *,
93
+ top: int = 5,
94
+ rerank: str | None = None,
95
+ flashrank_model: str | None = None,
96
+ remote_rerank: RemoteRerankConfig | None = None,
97
+ no_cache: bool = True,
98
+ ) -> SearchResponse:
99
+ """Search against the in-memory index without touching disk."""
100
+
101
+ clean_query = query.strip()
102
+ if not clean_query:
103
+ raise VexorError(Messages.ERROR_EMPTY_QUERY)
104
+ try:
105
+ ensure_positive(top, "top")
106
+ except ValueError as exc:
107
+ raise VexorError(str(exc)) from exc
108
+
109
+ effective_rerank = (rerank or self.rerank or DEFAULT_RERANK).strip().lower()
110
+ if effective_rerank not in SUPPORTED_RERANKERS:
111
+ effective_rerank = DEFAULT_RERANK
112
+
113
+ include_hidden = bool(self.metadata.get("include_hidden", False))
114
+ respect_gitignore = bool(self.metadata.get("respect_gitignore", True))
115
+ mode = str(self.metadata.get("mode", "auto"))
116
+ recursive = bool(self.metadata.get("recursive", True))
117
+ exclude_patterns = tuple(self.metadata.get("exclude_patterns") or ())
118
+ extensions = tuple(self.metadata.get("extensions") or ())
119
+
120
+ request = SearchRequest(
121
+ query=clean_query,
122
+ directory=self.base_path,
123
+ include_hidden=include_hidden,
124
+ respect_gitignore=respect_gitignore,
125
+ mode=mode,
126
+ recursive=recursive,
127
+ top_k=top,
128
+ model_name=self.model_name,
129
+ batch_size=self.batch_size,
130
+ embed_concurrency=self.embed_concurrency,
131
+ extract_concurrency=DEFAULT_EXTRACT_CONCURRENCY,
132
+ extract_backend=DEFAULT_EXTRACT_BACKEND,
133
+ provider=self.provider,
134
+ base_url=self.base_url,
135
+ api_key=self.api_key,
136
+ local_cuda=self.local_cuda,
137
+ exclude_patterns=exclude_patterns,
138
+ extensions=extensions,
139
+ auto_index=False,
140
+ temporary_index=True,
141
+ no_cache=no_cache,
142
+ rerank=effective_rerank,
143
+ flashrank_model=(
144
+ flashrank_model
145
+ if flashrank_model is not None
146
+ else self.flashrank_model
147
+ ),
148
+ remote_rerank=(
149
+ remote_rerank if remote_rerank is not None else self.remote_rerank
150
+ ),
151
+ )
152
+ return search_from_vectors(
153
+ request,
154
+ paths=self.paths,
155
+ file_vectors=self.vectors,
156
+ metadata=self.metadata,
157
+ is_stale=False,
158
+ )
159
+
160
+
56
161
  _RUNTIME_CONFIG: Config | None = None
57
162
 
58
163
 
164
+ @contextmanager
165
+ def _data_dir_context(
166
+ data_dir: Path | str | None,
167
+ *,
168
+ config_dir: Path | str | None,
169
+ cache_dir: Path | str | None,
170
+ ):
171
+ if data_dir is None and config_dir is None and cache_dir is None:
172
+ yield
173
+ return
174
+ effective_config_dir = config_dir if config_dir is not None else data_dir
175
+ effective_cache_dir = cache_dir if cache_dir is not None else data_dir
176
+ with ExitStack() as stack:
177
+ if effective_config_dir is not None:
178
+ stack.enter_context(config_dir_context(effective_config_dir))
179
+ if effective_cache_dir is not None:
180
+ stack.enter_context(cache_dir_context(effective_cache_dir))
181
+ yield
182
+
183
+
59
184
  def set_data_dir(path: Path | str | None) -> None:
60
185
  """Set the base directory for config and cache data."""
61
186
  set_config_dir(path)
@@ -77,6 +202,302 @@ def set_config_json(
77
202
  raise VexorError(str(exc)) from exc
78
203
 
79
204
 
205
+ class VexorClient:
206
+ """Session-style API wrapper for library use."""
207
+
208
+ def __init__(
209
+ self,
210
+ *,
211
+ data_dir: Path | str | None = None,
212
+ config_dir: Path | str | None = None,
213
+ cache_dir: Path | str | None = None,
214
+ use_config: bool = True,
215
+ ) -> None:
216
+ self.data_dir = data_dir
217
+ self.config_dir = config_dir
218
+ self.cache_dir = cache_dir
219
+ self.use_config = use_config
220
+ self._runtime_config: Config | None = None
221
+
222
+ def set_config_json(
223
+ self,
224
+ payload: Mapping[str, object] | str | None,
225
+ *,
226
+ replace: bool = False,
227
+ ) -> None:
228
+ """Set in-memory config for this client from a JSON string or mapping."""
229
+ if payload is None:
230
+ self._runtime_config = None
231
+ return
232
+ base = None if replace else (self._runtime_config or load_config())
233
+ try:
234
+ self._runtime_config = config_from_json(payload, base=base)
235
+ except ValueError as exc:
236
+ raise VexorError(str(exc)) from exc
237
+
238
+ @contextmanager
239
+ def config_context(
240
+ self,
241
+ payload: Mapping[str, object] | str | None,
242
+ *,
243
+ replace: bool = False,
244
+ ):
245
+ """Temporarily override this client's in-memory config."""
246
+ previous = self._runtime_config
247
+ self.set_config_json(payload, replace=replace)
248
+ try:
249
+ yield self
250
+ finally:
251
+ self._runtime_config = previous
252
+
253
+ def _resolve_dir_overrides(
254
+ self,
255
+ data_dir: Path | str | None,
256
+ config_dir: Path | str | None,
257
+ cache_dir: Path | str | None,
258
+ ) -> tuple[Path | str | None, Path | str | None, Path | str | None]:
259
+ resolved_data_dir = data_dir if data_dir is not None else self.data_dir
260
+ resolved_config_dir = config_dir if config_dir is not None else self.config_dir
261
+ resolved_cache_dir = cache_dir if cache_dir is not None else self.cache_dir
262
+ return resolved_data_dir, resolved_config_dir, resolved_cache_dir
263
+
264
+ def search(
265
+ self,
266
+ query: str,
267
+ *,
268
+ path: Path | str = Path.cwd(),
269
+ top: int = 5,
270
+ include_hidden: bool = False,
271
+ respect_gitignore: bool = True,
272
+ mode: str = "auto",
273
+ recursive: bool = True,
274
+ extensions: Sequence[str] | str | None = None,
275
+ exclude_patterns: Sequence[str] | str | None = None,
276
+ provider: str | None = None,
277
+ model: str | None = None,
278
+ batch_size: int | None = None,
279
+ embed_concurrency: int | None = None,
280
+ extract_concurrency: int | None = None,
281
+ extract_backend: str | None = None,
282
+ base_url: str | None = None,
283
+ api_key: str | None = None,
284
+ local_cuda: bool | None = None,
285
+ auto_index: bool | None = None,
286
+ use_config: bool | None = None,
287
+ config: Config | Mapping[str, object] | str | None = None,
288
+ temporary_index: bool = False,
289
+ no_cache: bool = False,
290
+ data_dir: Path | str | None = None,
291
+ config_dir: Path | str | None = None,
292
+ cache_dir: Path | str | None = None,
293
+ ) -> SearchResponse:
294
+ """Run a semantic search and return ranked results."""
295
+
296
+ resolved_use_config = self.use_config if use_config is None else use_config
297
+ resolved_data_dir, resolved_config_dir, resolved_cache_dir = (
298
+ self._resolve_dir_overrides(data_dir, config_dir, cache_dir)
299
+ )
300
+ return _search_with_settings(
301
+ query,
302
+ path=path,
303
+ top=top,
304
+ include_hidden=include_hidden,
305
+ respect_gitignore=respect_gitignore,
306
+ mode=mode,
307
+ recursive=recursive,
308
+ extensions=extensions,
309
+ exclude_patterns=exclude_patterns,
310
+ provider=provider,
311
+ model=model,
312
+ batch_size=batch_size,
313
+ embed_concurrency=embed_concurrency,
314
+ extract_concurrency=extract_concurrency,
315
+ extract_backend=extract_backend,
316
+ base_url=base_url,
317
+ api_key=api_key,
318
+ local_cuda=local_cuda,
319
+ auto_index=auto_index,
320
+ use_config=resolved_use_config,
321
+ config=config,
322
+ temporary_index=temporary_index,
323
+ no_cache=no_cache,
324
+ runtime_config=self._runtime_config,
325
+ data_dir=resolved_data_dir,
326
+ config_dir=resolved_config_dir,
327
+ cache_dir=resolved_cache_dir,
328
+ )
329
+
330
+ def index(
331
+ self,
332
+ path: Path | str = Path.cwd(),
333
+ *,
334
+ include_hidden: bool = False,
335
+ respect_gitignore: bool = True,
336
+ mode: str = "auto",
337
+ recursive: bool = True,
338
+ extensions: Sequence[str] | str | None = None,
339
+ exclude_patterns: Sequence[str] | str | None = None,
340
+ provider: str | None = None,
341
+ model: str | None = None,
342
+ batch_size: int | None = None,
343
+ embed_concurrency: int | None = None,
344
+ extract_concurrency: int | None = None,
345
+ extract_backend: str | None = None,
346
+ base_url: str | None = None,
347
+ api_key: str | None = None,
348
+ local_cuda: bool | None = None,
349
+ use_config: bool | None = None,
350
+ config: Config | Mapping[str, object] | str | None = None,
351
+ data_dir: Path | str | None = None,
352
+ config_dir: Path | str | None = None,
353
+ cache_dir: Path | str | None = None,
354
+ ) -> IndexResult:
355
+ """Build or refresh the index for the given directory."""
356
+
357
+ resolved_use_config = self.use_config if use_config is None else use_config
358
+ resolved_data_dir, resolved_config_dir, resolved_cache_dir = (
359
+ self._resolve_dir_overrides(data_dir, config_dir, cache_dir)
360
+ )
361
+ return _index_with_settings(
362
+ path=path,
363
+ include_hidden=include_hidden,
364
+ respect_gitignore=respect_gitignore,
365
+ mode=mode,
366
+ recursive=recursive,
367
+ extensions=extensions,
368
+ exclude_patterns=exclude_patterns,
369
+ provider=provider,
370
+ model=model,
371
+ batch_size=batch_size,
372
+ embed_concurrency=embed_concurrency,
373
+ extract_concurrency=extract_concurrency,
374
+ extract_backend=extract_backend,
375
+ base_url=base_url,
376
+ api_key=api_key,
377
+ local_cuda=local_cuda,
378
+ use_config=resolved_use_config,
379
+ config=config,
380
+ runtime_config=self._runtime_config,
381
+ data_dir=resolved_data_dir,
382
+ config_dir=resolved_config_dir,
383
+ cache_dir=resolved_cache_dir,
384
+ )
385
+
386
+ def index_in_memory(
387
+ self,
388
+ path: Path | str = Path.cwd(),
389
+ *,
390
+ include_hidden: bool = False,
391
+ respect_gitignore: bool = True,
392
+ mode: str = "auto",
393
+ recursive: bool = True,
394
+ extensions: Sequence[str] | str | None = None,
395
+ exclude_patterns: Sequence[str] | str | None = None,
396
+ provider: str | None = None,
397
+ model: str | None = None,
398
+ batch_size: int | None = None,
399
+ embed_concurrency: int | None = None,
400
+ extract_concurrency: int | None = None,
401
+ extract_backend: str | None = None,
402
+ base_url: str | None = None,
403
+ api_key: str | None = None,
404
+ local_cuda: bool | None = None,
405
+ use_config: bool | None = None,
406
+ config: Config | Mapping[str, object] | str | None = None,
407
+ no_cache: bool = True,
408
+ data_dir: Path | str | None = None,
409
+ config_dir: Path | str | None = None,
410
+ cache_dir: Path | str | None = None,
411
+ ) -> InMemoryIndex:
412
+ """Build an index in memory without writing to disk."""
413
+
414
+ resolved_use_config = self.use_config if use_config is None else use_config
415
+ resolved_data_dir, resolved_config_dir, resolved_cache_dir = (
416
+ self._resolve_dir_overrides(data_dir, config_dir, cache_dir)
417
+ )
418
+ return _index_in_memory_with_settings(
419
+ path=path,
420
+ include_hidden=include_hidden,
421
+ respect_gitignore=respect_gitignore,
422
+ mode=mode,
423
+ recursive=recursive,
424
+ extensions=extensions,
425
+ exclude_patterns=exclude_patterns,
426
+ provider=provider,
427
+ model=model,
428
+ batch_size=batch_size,
429
+ embed_concurrency=embed_concurrency,
430
+ extract_concurrency=extract_concurrency,
431
+ extract_backend=extract_backend,
432
+ base_url=base_url,
433
+ api_key=api_key,
434
+ local_cuda=local_cuda,
435
+ use_config=resolved_use_config,
436
+ config=config,
437
+ no_cache=no_cache,
438
+ runtime_config=self._runtime_config,
439
+ data_dir=resolved_data_dir,
440
+ config_dir=resolved_config_dir,
441
+ cache_dir=resolved_cache_dir,
442
+ )
443
+
444
+ def clear_index(
445
+ self,
446
+ path: Path | str = Path.cwd(),
447
+ *,
448
+ include_hidden: bool = False,
449
+ respect_gitignore: bool = True,
450
+ mode: str = "auto",
451
+ recursive: bool = True,
452
+ extensions: Sequence[str] | str | None = None,
453
+ exclude_patterns: Sequence[str] | str | None = None,
454
+ data_dir: Path | str | None = None,
455
+ config_dir: Path | str | None = None,
456
+ cache_dir: Path | str | None = None,
457
+ ) -> int:
458
+ """Clear cached index entries for the given directory."""
459
+
460
+ resolved_data_dir, resolved_config_dir, resolved_cache_dir = (
461
+ self._resolve_dir_overrides(data_dir, config_dir, cache_dir)
462
+ )
463
+ return _clear_index_with_settings(
464
+ path=path,
465
+ include_hidden=include_hidden,
466
+ respect_gitignore=respect_gitignore,
467
+ mode=mode,
468
+ recursive=recursive,
469
+ extensions=extensions,
470
+ exclude_patterns=exclude_patterns,
471
+ data_dir=resolved_data_dir,
472
+ config_dir=resolved_config_dir,
473
+ cache_dir=resolved_cache_dir,
474
+ )
475
+
476
+
477
+ @contextmanager
478
+ def config_context(
479
+ payload: Mapping[str, object] | str | None,
480
+ *,
481
+ replace: bool = False,
482
+ data_dir: Path | str | None = None,
483
+ config_dir: Path | str | None = None,
484
+ cache_dir: Path | str | None = None,
485
+ use_config: bool = True,
486
+ ):
487
+ """Yield a configured client for scoped API usage."""
488
+ client = VexorClient(
489
+ data_dir=data_dir,
490
+ config_dir=config_dir,
491
+ cache_dir=cache_dir,
492
+ use_config=use_config,
493
+ )
494
+ client.set_config_json(payload, replace=replace)
495
+ try:
496
+ yield client
497
+ finally:
498
+ client.set_config_json(None)
499
+
500
+
80
501
  def search(
81
502
  query: str,
82
503
  *,
@@ -102,25 +523,21 @@ def search(
102
523
  config: Config | Mapping[str, object] | str | None = None,
103
524
  temporary_index: bool = False,
104
525
  no_cache: bool = False,
526
+ data_dir: Path | str | None = None,
527
+ config_dir: Path | str | None = None,
528
+ cache_dir: Path | str | None = None,
105
529
  ) -> SearchResponse:
106
530
  """Run a semantic search and return ranked results."""
107
-
108
- clean_query = query.strip()
109
- if not clean_query:
110
- raise VexorError(Messages.ERROR_EMPTY_QUERY)
111
- try:
112
- ensure_positive(top, "top")
113
- except ValueError as exc:
114
- raise VexorError(str(exc)) from exc
115
-
116
- directory = resolve_directory(path)
117
- mode_value = _validate_mode(mode)
118
- normalized_exts = _normalize_extensions(extensions)
119
- normalized_excludes = _normalize_excludes(exclude_patterns)
120
- if extensions and not normalized_exts:
121
- raise VexorError(Messages.ERROR_EXTENSIONS_EMPTY)
122
-
123
- settings = _resolve_settings(
531
+ return _search_with_settings(
532
+ query,
533
+ path=path,
534
+ top=top,
535
+ include_hidden=include_hidden,
536
+ respect_gitignore=respect_gitignore,
537
+ mode=mode,
538
+ recursive=recursive,
539
+ extensions=extensions,
540
+ exclude_patterns=exclude_patterns,
124
541
  provider=provider,
125
542
  model=model,
126
543
  batch_size=batch_size,
@@ -132,37 +549,14 @@ def search(
132
549
  local_cuda=local_cuda,
133
550
  auto_index=auto_index,
134
551
  use_config=use_config,
135
- runtime_config=_RUNTIME_CONFIG,
136
- config_override=config,
137
- )
138
-
139
- request = SearchRequest(
140
- query=clean_query,
141
- directory=directory,
142
- include_hidden=include_hidden,
143
- respect_gitignore=respect_gitignore,
144
- mode=mode_value,
145
- recursive=recursive,
146
- top_k=top,
147
- model_name=settings.model_name,
148
- batch_size=settings.batch_size,
149
- embed_concurrency=settings.embed_concurrency,
150
- extract_concurrency=settings.extract_concurrency,
151
- extract_backend=settings.extract_backend,
152
- provider=settings.provider,
153
- base_url=settings.base_url,
154
- api_key=settings.api_key,
155
- local_cuda=settings.local_cuda,
156
- exclude_patterns=normalized_excludes,
157
- extensions=normalized_exts,
158
- auto_index=settings.auto_index,
552
+ config=config,
159
553
  temporary_index=temporary_index,
160
554
  no_cache=no_cache,
161
- rerank=settings.rerank,
162
- flashrank_model=settings.flashrank_model,
163
- remote_rerank=settings.remote_rerank,
555
+ runtime_config=_RUNTIME_CONFIG,
556
+ data_dir=data_dir,
557
+ config_dir=config_dir,
558
+ cache_dir=cache_dir,
164
559
  )
165
- return perform_search(request)
166
560
 
167
561
 
168
562
  def index(
@@ -185,17 +579,19 @@ def index(
185
579
  local_cuda: bool | None = None,
186
580
  use_config: bool = True,
187
581
  config: Config | Mapping[str, object] | str | None = None,
582
+ data_dir: Path | str | None = None,
583
+ config_dir: Path | str | None = None,
584
+ cache_dir: Path | str | None = None,
188
585
  ) -> IndexResult:
189
586
  """Build or refresh the index for the given directory."""
190
-
191
- directory = resolve_directory(path)
192
- mode_value = _validate_mode(mode)
193
- normalized_exts = _normalize_extensions(extensions)
194
- normalized_excludes = _normalize_excludes(exclude_patterns)
195
- if extensions and not normalized_exts:
196
- raise VexorError(Messages.ERROR_EXTENSIONS_EMPTY)
197
-
198
- settings = _resolve_settings(
587
+ return _index_with_settings(
588
+ path=path,
589
+ include_hidden=include_hidden,
590
+ respect_gitignore=respect_gitignore,
591
+ mode=mode,
592
+ recursive=recursive,
593
+ extensions=extensions,
594
+ exclude_patterns=exclude_patterns,
199
595
  provider=provider,
200
596
  model=model,
201
597
  batch_size=batch_size,
@@ -205,29 +601,65 @@ def index(
205
601
  base_url=base_url,
206
602
  api_key=api_key,
207
603
  local_cuda=local_cuda,
208
- auto_index=None,
209
604
  use_config=use_config,
605
+ config=config,
210
606
  runtime_config=_RUNTIME_CONFIG,
211
- config_override=config,
607
+ data_dir=data_dir,
608
+ config_dir=config_dir,
609
+ cache_dir=cache_dir,
212
610
  )
213
611
 
214
- return build_index(
215
- directory,
612
+
613
+ def index_in_memory(
614
+ path: Path | str = Path.cwd(),
615
+ *,
616
+ include_hidden: bool = False,
617
+ respect_gitignore: bool = True,
618
+ mode: str = "auto",
619
+ recursive: bool = True,
620
+ extensions: Sequence[str] | str | None = None,
621
+ exclude_patterns: Sequence[str] | str | None = None,
622
+ provider: str | None = None,
623
+ model: str | None = None,
624
+ batch_size: int | None = None,
625
+ embed_concurrency: int | None = None,
626
+ extract_concurrency: int | None = None,
627
+ extract_backend: str | None = None,
628
+ base_url: str | None = None,
629
+ api_key: str | None = None,
630
+ local_cuda: bool | None = None,
631
+ use_config: bool = True,
632
+ config: Config | Mapping[str, object] | str | None = None,
633
+ no_cache: bool = True,
634
+ data_dir: Path | str | None = None,
635
+ config_dir: Path | str | None = None,
636
+ cache_dir: Path | str | None = None,
637
+ ) -> InMemoryIndex:
638
+ """Build an index in memory without writing to disk."""
639
+ return _index_in_memory_with_settings(
640
+ path=path,
216
641
  include_hidden=include_hidden,
217
642
  respect_gitignore=respect_gitignore,
218
- mode=mode_value,
643
+ mode=mode,
219
644
  recursive=recursive,
220
- model_name=settings.model_name,
221
- batch_size=settings.batch_size,
222
- embed_concurrency=settings.embed_concurrency,
223
- extract_concurrency=settings.extract_concurrency,
224
- extract_backend=settings.extract_backend,
225
- provider=settings.provider,
226
- base_url=settings.base_url,
227
- api_key=settings.api_key,
228
- local_cuda=settings.local_cuda,
229
- exclude_patterns=normalized_excludes,
230
- extensions=normalized_exts,
645
+ extensions=extensions,
646
+ exclude_patterns=exclude_patterns,
647
+ provider=provider,
648
+ model=model,
649
+ batch_size=batch_size,
650
+ embed_concurrency=embed_concurrency,
651
+ extract_concurrency=extract_concurrency,
652
+ extract_backend=extract_backend,
653
+ base_url=base_url,
654
+ api_key=api_key,
655
+ local_cuda=local_cuda,
656
+ use_config=use_config,
657
+ config=config,
658
+ no_cache=no_cache,
659
+ runtime_config=_RUNTIME_CONFIG,
660
+ data_dir=data_dir,
661
+ config_dir=config_dir,
662
+ cache_dir=cache_dir,
231
663
  )
232
664
 
233
665
 
@@ -240,27 +672,305 @@ def clear_index(
240
672
  recursive: bool = True,
241
673
  extensions: Sequence[str] | str | None = None,
242
674
  exclude_patterns: Sequence[str] | str | None = None,
675
+ data_dir: Path | str | None = None,
676
+ config_dir: Path | str | None = None,
677
+ cache_dir: Path | str | None = None,
243
678
  ) -> int:
244
679
  """Clear cached index entries for the given directory."""
245
-
246
- directory = resolve_directory(path)
247
- mode_value = _validate_mode(mode)
248
- normalized_exts = _normalize_extensions(extensions)
249
- normalized_excludes = _normalize_excludes(exclude_patterns)
250
- if extensions and not normalized_exts:
251
- raise VexorError(Messages.ERROR_EXTENSIONS_EMPTY)
252
-
253
- return clear_index_entries(
254
- directory,
680
+ return _clear_index_with_settings(
681
+ path=path,
255
682
  include_hidden=include_hidden,
256
683
  respect_gitignore=respect_gitignore,
257
- mode=mode_value,
684
+ mode=mode,
258
685
  recursive=recursive,
259
- exclude_patterns=normalized_excludes,
260
- extensions=normalized_exts,
686
+ extensions=extensions,
687
+ exclude_patterns=exclude_patterns,
688
+ data_dir=data_dir,
689
+ config_dir=config_dir,
690
+ cache_dir=cache_dir,
261
691
  )
262
692
 
263
693
 
694
+ def _search_with_settings(
695
+ query: str,
696
+ *,
697
+ path: Path | str,
698
+ top: int,
699
+ include_hidden: bool,
700
+ respect_gitignore: bool,
701
+ mode: str,
702
+ recursive: bool,
703
+ extensions: Sequence[str] | str | None,
704
+ exclude_patterns: Sequence[str] | str | None,
705
+ provider: str | None,
706
+ model: str | None,
707
+ batch_size: int | None,
708
+ embed_concurrency: int | None,
709
+ extract_concurrency: int | None,
710
+ extract_backend: str | None,
711
+ base_url: str | None,
712
+ api_key: str | None,
713
+ local_cuda: bool | None,
714
+ auto_index: bool | None,
715
+ use_config: bool,
716
+ config: Config | Mapping[str, object] | str | None,
717
+ temporary_index: bool,
718
+ no_cache: bool,
719
+ runtime_config: Config | None,
720
+ data_dir: Path | str | None,
721
+ config_dir: Path | str | None,
722
+ cache_dir: Path | str | None,
723
+ ) -> SearchResponse:
724
+ with _data_dir_context(data_dir, config_dir=config_dir, cache_dir=cache_dir):
725
+ clean_query = query.strip()
726
+ if not clean_query:
727
+ raise VexorError(Messages.ERROR_EMPTY_QUERY)
728
+ try:
729
+ ensure_positive(top, "top")
730
+ except ValueError as exc:
731
+ raise VexorError(str(exc)) from exc
732
+
733
+ directory = resolve_directory(path)
734
+ mode_value = _validate_mode(mode)
735
+ normalized_exts = _normalize_extensions(extensions)
736
+ normalized_excludes = _normalize_excludes(exclude_patterns)
737
+ if extensions and not normalized_exts:
738
+ raise VexorError(Messages.ERROR_EXTENSIONS_EMPTY)
739
+
740
+ settings = _resolve_settings(
741
+ provider=provider,
742
+ model=model,
743
+ batch_size=batch_size,
744
+ embed_concurrency=embed_concurrency,
745
+ extract_concurrency=extract_concurrency,
746
+ extract_backend=extract_backend,
747
+ base_url=base_url,
748
+ api_key=api_key,
749
+ local_cuda=local_cuda,
750
+ auto_index=auto_index,
751
+ use_config=use_config,
752
+ runtime_config=runtime_config,
753
+ config_override=config,
754
+ )
755
+
756
+ request = SearchRequest(
757
+ query=clean_query,
758
+ directory=directory,
759
+ include_hidden=include_hidden,
760
+ respect_gitignore=respect_gitignore,
761
+ mode=mode_value,
762
+ recursive=recursive,
763
+ top_k=top,
764
+ model_name=settings.model_name,
765
+ batch_size=settings.batch_size,
766
+ embed_concurrency=settings.embed_concurrency,
767
+ extract_concurrency=settings.extract_concurrency,
768
+ extract_backend=settings.extract_backend,
769
+ provider=settings.provider,
770
+ base_url=settings.base_url,
771
+ api_key=settings.api_key,
772
+ local_cuda=settings.local_cuda,
773
+ exclude_patterns=normalized_excludes,
774
+ extensions=normalized_exts,
775
+ auto_index=settings.auto_index,
776
+ temporary_index=temporary_index,
777
+ no_cache=no_cache,
778
+ rerank=settings.rerank,
779
+ flashrank_model=settings.flashrank_model,
780
+ remote_rerank=settings.remote_rerank,
781
+ )
782
+ return perform_search(request)
783
+
784
+
785
+ def _index_with_settings(
786
+ *,
787
+ path: Path | str,
788
+ include_hidden: bool,
789
+ respect_gitignore: bool,
790
+ mode: str,
791
+ recursive: bool,
792
+ extensions: Sequence[str] | str | None,
793
+ exclude_patterns: Sequence[str] | str | None,
794
+ provider: str | None,
795
+ model: str | None,
796
+ batch_size: int | None,
797
+ embed_concurrency: int | None,
798
+ extract_concurrency: int | None,
799
+ extract_backend: str | None,
800
+ base_url: str | None,
801
+ api_key: str | None,
802
+ local_cuda: bool | None,
803
+ use_config: bool,
804
+ config: Config | Mapping[str, object] | str | None,
805
+ runtime_config: Config | None,
806
+ data_dir: Path | str | None,
807
+ config_dir: Path | str | None,
808
+ cache_dir: Path | str | None,
809
+ ) -> IndexResult:
810
+ with _data_dir_context(data_dir, config_dir=config_dir, cache_dir=cache_dir):
811
+ directory = resolve_directory(path)
812
+ mode_value = _validate_mode(mode)
813
+ normalized_exts = _normalize_extensions(extensions)
814
+ normalized_excludes = _normalize_excludes(exclude_patterns)
815
+ if extensions and not normalized_exts:
816
+ raise VexorError(Messages.ERROR_EXTENSIONS_EMPTY)
817
+
818
+ settings = _resolve_settings(
819
+ provider=provider,
820
+ model=model,
821
+ batch_size=batch_size,
822
+ embed_concurrency=embed_concurrency,
823
+ extract_concurrency=extract_concurrency,
824
+ extract_backend=extract_backend,
825
+ base_url=base_url,
826
+ api_key=api_key,
827
+ local_cuda=local_cuda,
828
+ auto_index=None,
829
+ use_config=use_config,
830
+ runtime_config=runtime_config,
831
+ config_override=config,
832
+ )
833
+
834
+ return build_index(
835
+ directory,
836
+ include_hidden=include_hidden,
837
+ respect_gitignore=respect_gitignore,
838
+ mode=mode_value,
839
+ recursive=recursive,
840
+ model_name=settings.model_name,
841
+ batch_size=settings.batch_size,
842
+ embed_concurrency=settings.embed_concurrency,
843
+ extract_concurrency=settings.extract_concurrency,
844
+ extract_backend=settings.extract_backend,
845
+ provider=settings.provider,
846
+ base_url=settings.base_url,
847
+ api_key=settings.api_key,
848
+ local_cuda=settings.local_cuda,
849
+ exclude_patterns=normalized_excludes,
850
+ extensions=normalized_exts,
851
+ )
852
+
853
+
854
+ def _index_in_memory_with_settings(
855
+ *,
856
+ path: Path | str,
857
+ include_hidden: bool,
858
+ respect_gitignore: bool,
859
+ mode: str,
860
+ recursive: bool,
861
+ extensions: Sequence[str] | str | None,
862
+ exclude_patterns: Sequence[str] | str | None,
863
+ provider: str | None,
864
+ model: str | None,
865
+ batch_size: int | None,
866
+ embed_concurrency: int | None,
867
+ extract_concurrency: int | None,
868
+ extract_backend: str | None,
869
+ base_url: str | None,
870
+ api_key: str | None,
871
+ local_cuda: bool | None,
872
+ use_config: bool,
873
+ config: Config | Mapping[str, object] | str | None,
874
+ no_cache: bool,
875
+ runtime_config: Config | None,
876
+ data_dir: Path | str | None,
877
+ config_dir: Path | str | None,
878
+ cache_dir: Path | str | None,
879
+ ) -> InMemoryIndex:
880
+ with _data_dir_context(data_dir, config_dir=config_dir, cache_dir=cache_dir):
881
+ directory = resolve_directory(path)
882
+ mode_value = _validate_mode(mode)
883
+ normalized_exts = _normalize_extensions(extensions)
884
+ normalized_excludes = _normalize_excludes(exclude_patterns)
885
+ if extensions and not normalized_exts:
886
+ raise VexorError(Messages.ERROR_EXTENSIONS_EMPTY)
887
+
888
+ settings = _resolve_settings(
889
+ provider=provider,
890
+ model=model,
891
+ batch_size=batch_size,
892
+ embed_concurrency=embed_concurrency,
893
+ extract_concurrency=extract_concurrency,
894
+ extract_backend=extract_backend,
895
+ base_url=base_url,
896
+ api_key=api_key,
897
+ local_cuda=local_cuda,
898
+ auto_index=None,
899
+ use_config=use_config,
900
+ runtime_config=runtime_config,
901
+ config_override=config,
902
+ )
903
+
904
+ paths, vectors, metadata = build_index_in_memory(
905
+ directory,
906
+ include_hidden=include_hidden,
907
+ respect_gitignore=respect_gitignore,
908
+ mode=mode_value,
909
+ recursive=recursive,
910
+ model_name=settings.model_name,
911
+ batch_size=settings.batch_size,
912
+ embed_concurrency=settings.embed_concurrency,
913
+ extract_concurrency=settings.extract_concurrency,
914
+ extract_backend=settings.extract_backend,
915
+ provider=settings.provider,
916
+ base_url=settings.base_url,
917
+ api_key=settings.api_key,
918
+ local_cuda=settings.local_cuda,
919
+ exclude_patterns=normalized_excludes,
920
+ extensions=normalized_exts,
921
+ no_cache=no_cache,
922
+ )
923
+
924
+ return InMemoryIndex(
925
+ base_path=directory,
926
+ paths=paths,
927
+ vectors=vectors,
928
+ metadata=metadata,
929
+ model_name=settings.model_name,
930
+ batch_size=settings.batch_size,
931
+ embed_concurrency=settings.embed_concurrency,
932
+ provider=settings.provider,
933
+ base_url=settings.base_url,
934
+ api_key=settings.api_key,
935
+ local_cuda=settings.local_cuda,
936
+ rerank=settings.rerank,
937
+ flashrank_model=settings.flashrank_model,
938
+ remote_rerank=settings.remote_rerank,
939
+ )
940
+
941
+
942
+ def _clear_index_with_settings(
943
+ *,
944
+ path: Path | str,
945
+ include_hidden: bool,
946
+ respect_gitignore: bool,
947
+ mode: str,
948
+ recursive: bool,
949
+ extensions: Sequence[str] | str | None,
950
+ exclude_patterns: Sequence[str] | str | None,
951
+ data_dir: Path | str | None,
952
+ config_dir: Path | str | None,
953
+ cache_dir: Path | str | None,
954
+ ) -> int:
955
+ with _data_dir_context(data_dir, config_dir=config_dir, cache_dir=cache_dir):
956
+ directory = resolve_directory(path)
957
+ mode_value = _validate_mode(mode)
958
+ normalized_exts = _normalize_extensions(extensions)
959
+ normalized_excludes = _normalize_excludes(exclude_patterns)
960
+ if extensions and not normalized_exts:
961
+ raise VexorError(Messages.ERROR_EXTENSIONS_EMPTY)
962
+
963
+ return clear_index_entries(
964
+ directory,
965
+ include_hidden=include_hidden,
966
+ respect_gitignore=respect_gitignore,
967
+ mode=mode_value,
968
+ recursive=recursive,
969
+ exclude_patterns=normalized_excludes,
970
+ extensions=normalized_exts,
971
+ )
972
+
973
+
264
974
  def _validate_mode(mode: str) -> str:
265
975
  try:
266
976
  get_strategy(mode)