vexor 0.20.0__py3-none-any.whl → 0.21.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vexor/__init__.py +1 -1
- vexor/api.py +26 -0
- vexor/cache.py +525 -286
- vexor/cli.py +53 -0
- vexor/config.py +54 -1
- vexor/providers/gemini.py +79 -13
- vexor/providers/openai.py +79 -13
- vexor/services/config_service.py +14 -0
- vexor/services/index_service.py +132 -5
- vexor/services/search_service.py +94 -27
- vexor/text.py +10 -0
- {vexor-0.20.0.dist-info → vexor-0.21.1.dist-info}/METADATA +15 -13
- {vexor-0.20.0.dist-info → vexor-0.21.1.dist-info}/RECORD +16 -16
- {vexor-0.20.0.dist-info → vexor-0.21.1.dist-info}/WHEEL +0 -0
- {vexor-0.20.0.dist-info → vexor-0.21.1.dist-info}/entry_points.txt +0 -0
- {vexor-0.20.0.dist-info → vexor-0.21.1.dist-info}/licenses/LICENSE +0 -0
vexor/cli.py
CHANGED
|
@@ -31,6 +31,7 @@ from .config import (
|
|
|
31
31
|
DEFAULT_MODEL,
|
|
32
32
|
DEFAULT_PROVIDER,
|
|
33
33
|
DEFAULT_RERANK,
|
|
34
|
+
SUPPORTED_EXTRACT_BACKENDS,
|
|
34
35
|
SUPPORTED_PROVIDERS,
|
|
35
36
|
SUPPORTED_RERANKERS,
|
|
36
37
|
flashrank_cache_dir,
|
|
@@ -401,6 +402,8 @@ def search(
|
|
|
401
402
|
model_name = resolve_default_model(provider, config.model)
|
|
402
403
|
batch_size = config.batch_size if config.batch_size is not None else DEFAULT_BATCH_SIZE
|
|
403
404
|
embed_concurrency = config.embed_concurrency
|
|
405
|
+
extract_concurrency = config.extract_concurrency
|
|
406
|
+
extract_backend = config.extract_backend
|
|
404
407
|
base_url = config.base_url
|
|
405
408
|
api_key = config.api_key
|
|
406
409
|
auto_index = bool(config.auto_index)
|
|
@@ -438,6 +441,8 @@ def search(
|
|
|
438
441
|
model_name=model_name,
|
|
439
442
|
batch_size=batch_size,
|
|
440
443
|
embed_concurrency=embed_concurrency,
|
|
444
|
+
extract_concurrency=extract_concurrency,
|
|
445
|
+
extract_backend=extract_backend,
|
|
441
446
|
provider=provider,
|
|
442
447
|
base_url=base_url,
|
|
443
448
|
api_key=api_key,
|
|
@@ -577,6 +582,8 @@ def index(
|
|
|
577
582
|
model_name = resolve_default_model(provider, config.model)
|
|
578
583
|
batch_size = config.batch_size if config.batch_size is not None else DEFAULT_BATCH_SIZE
|
|
579
584
|
embed_concurrency = config.embed_concurrency
|
|
585
|
+
extract_concurrency = config.extract_concurrency
|
|
586
|
+
extract_backend = config.extract_backend
|
|
580
587
|
base_url = config.base_url
|
|
581
588
|
api_key = config.api_key
|
|
582
589
|
|
|
@@ -673,6 +680,8 @@ def index(
|
|
|
673
680
|
model_name=model_name,
|
|
674
681
|
batch_size=batch_size,
|
|
675
682
|
embed_concurrency=embed_concurrency,
|
|
683
|
+
extract_concurrency=extract_concurrency,
|
|
684
|
+
extract_backend=extract_backend,
|
|
676
685
|
provider=provider,
|
|
677
686
|
base_url=base_url,
|
|
678
687
|
api_key=api_key,
|
|
@@ -734,6 +743,16 @@ def config(
|
|
|
734
743
|
"--set-embed-concurrency",
|
|
735
744
|
help=Messages.HELP_SET_EMBED_CONCURRENCY,
|
|
736
745
|
),
|
|
746
|
+
set_extract_concurrency_option: int | None = typer.Option(
|
|
747
|
+
None,
|
|
748
|
+
"--set-extract-concurrency",
|
|
749
|
+
help=Messages.HELP_SET_EXTRACT_CONCURRENCY,
|
|
750
|
+
),
|
|
751
|
+
set_extract_backend_option: str | None = typer.Option(
|
|
752
|
+
None,
|
|
753
|
+
"--set-extract-backend",
|
|
754
|
+
help=Messages.HELP_SET_EXTRACT_BACKEND,
|
|
755
|
+
),
|
|
737
756
|
set_provider_option: str | None = typer.Option(
|
|
738
757
|
None,
|
|
739
758
|
"--set-provider",
|
|
@@ -810,6 +829,8 @@ def config(
|
|
|
810
829
|
raise typer.BadParameter(Messages.ERROR_BATCH_NEGATIVE)
|
|
811
830
|
if set_embed_concurrency_option is not None and set_embed_concurrency_option < 1:
|
|
812
831
|
raise typer.BadParameter(Messages.ERROR_CONCURRENCY_INVALID)
|
|
832
|
+
if set_extract_concurrency_option is not None and set_extract_concurrency_option < 1:
|
|
833
|
+
raise typer.BadParameter(Messages.ERROR_EXTRACT_CONCURRENCY_INVALID)
|
|
813
834
|
if set_base_url_option and clear_base_url:
|
|
814
835
|
raise typer.BadParameter(Messages.ERROR_BASE_URL_CONFLICT)
|
|
815
836
|
flashrank_model_reset = False
|
|
@@ -835,6 +856,16 @@ def config(
|
|
|
835
856
|
if not normalized_remote_key:
|
|
836
857
|
raise typer.BadParameter(Messages.ERROR_REMOTE_RERANK_API_KEY_EMPTY)
|
|
837
858
|
set_remote_rerank_api_key_option = normalized_remote_key
|
|
859
|
+
if set_extract_backend_option is not None:
|
|
860
|
+
normalized_backend = set_extract_backend_option.strip().lower()
|
|
861
|
+
if normalized_backend not in SUPPORTED_EXTRACT_BACKENDS:
|
|
862
|
+
allowed = ", ".join(SUPPORTED_EXTRACT_BACKENDS)
|
|
863
|
+
raise typer.BadParameter(
|
|
864
|
+
Messages.ERROR_EXTRACT_BACKEND_INVALID.format(
|
|
865
|
+
value=set_extract_backend_option, allowed=allowed
|
|
866
|
+
)
|
|
867
|
+
)
|
|
868
|
+
set_extract_backend_option = normalized_backend
|
|
838
869
|
if clear_remote_rerank and any(
|
|
839
870
|
(
|
|
840
871
|
set_remote_rerank_url_option is not None,
|
|
@@ -850,6 +881,8 @@ def config(
|
|
|
850
881
|
set_model_option is not None,
|
|
851
882
|
set_batch_option is not None,
|
|
852
883
|
set_embed_concurrency_option is not None,
|
|
884
|
+
set_extract_concurrency_option is not None,
|
|
885
|
+
set_extract_backend_option is not None,
|
|
853
886
|
set_provider_option is not None,
|
|
854
887
|
set_base_url_option is not None,
|
|
855
888
|
clear_base_url,
|
|
@@ -962,6 +995,8 @@ def config(
|
|
|
962
995
|
model=set_model_option,
|
|
963
996
|
batch_size=set_batch_option,
|
|
964
997
|
embed_concurrency=set_embed_concurrency_option,
|
|
998
|
+
extract_concurrency=set_extract_concurrency_option,
|
|
999
|
+
extract_backend=set_extract_backend_option,
|
|
965
1000
|
provider=set_provider_option,
|
|
966
1001
|
base_url=set_base_url_option,
|
|
967
1002
|
clear_base_url=clear_base_url,
|
|
@@ -993,6 +1028,22 @@ def config(
|
|
|
993
1028
|
Styles.SUCCESS,
|
|
994
1029
|
)
|
|
995
1030
|
)
|
|
1031
|
+
if updates.extract_concurrency_set and set_extract_concurrency_option is not None:
|
|
1032
|
+
console.print(
|
|
1033
|
+
_styled(
|
|
1034
|
+
Messages.INFO_EXTRACT_CONCURRENCY_SET.format(
|
|
1035
|
+
value=set_extract_concurrency_option
|
|
1036
|
+
),
|
|
1037
|
+
Styles.SUCCESS,
|
|
1038
|
+
)
|
|
1039
|
+
)
|
|
1040
|
+
if updates.extract_backend_set and set_extract_backend_option is not None:
|
|
1041
|
+
console.print(
|
|
1042
|
+
_styled(
|
|
1043
|
+
Messages.INFO_EXTRACT_BACKEND_SET.format(value=set_extract_backend_option),
|
|
1044
|
+
Styles.SUCCESS,
|
|
1045
|
+
)
|
|
1046
|
+
)
|
|
996
1047
|
if updates.provider_set and set_provider_option is not None:
|
|
997
1048
|
console.print(
|
|
998
1049
|
_styled(Messages.INFO_PROVIDER_SET.format(value=set_provider_option), Styles.SUCCESS)
|
|
@@ -1139,6 +1190,8 @@ def config(
|
|
|
1139
1190
|
model=resolve_default_model(provider, cfg.model),
|
|
1140
1191
|
batch=cfg.batch_size if cfg.batch_size is not None else DEFAULT_BATCH_SIZE,
|
|
1141
1192
|
concurrency=cfg.embed_concurrency,
|
|
1193
|
+
extract_concurrency=cfg.extract_concurrency,
|
|
1194
|
+
extract_backend=cfg.extract_backend,
|
|
1142
1195
|
auto_index="yes" if cfg.auto_index else "no",
|
|
1143
1196
|
rerank=rerank,
|
|
1144
1197
|
flashrank_line=flashrank_line,
|
vexor/config.py
CHANGED
|
@@ -19,13 +19,16 @@ DEFAULT_MODEL = "text-embedding-3-small"
|
|
|
19
19
|
DEFAULT_GEMINI_MODEL = "gemini-embedding-001"
|
|
20
20
|
DEFAULT_LOCAL_MODEL = "intfloat/multilingual-e5-small"
|
|
21
21
|
DEFAULT_BATCH_SIZE = 64
|
|
22
|
-
DEFAULT_EMBED_CONCURRENCY =
|
|
22
|
+
DEFAULT_EMBED_CONCURRENCY = 4
|
|
23
|
+
DEFAULT_EXTRACT_CONCURRENCY = max(1, min(4, os.cpu_count() or 1))
|
|
24
|
+
DEFAULT_EXTRACT_BACKEND = "auto"
|
|
23
25
|
DEFAULT_PROVIDER = "openai"
|
|
24
26
|
DEFAULT_RERANK = "off"
|
|
25
27
|
DEFAULT_FLASHRANK_MODEL = "ms-marco-TinyBERT-L-2-v2"
|
|
26
28
|
DEFAULT_FLASHRANK_MAX_LENGTH = 256
|
|
27
29
|
SUPPORTED_PROVIDERS: tuple[str, ...] = (DEFAULT_PROVIDER, "gemini", "custom", "local")
|
|
28
30
|
SUPPORTED_RERANKERS: tuple[str, ...] = ("off", "bm25", "flashrank", "remote")
|
|
31
|
+
SUPPORTED_EXTRACT_BACKENDS: tuple[str, ...] = ("auto", "thread", "process")
|
|
29
32
|
ENV_API_KEY = "VEXOR_API_KEY"
|
|
30
33
|
REMOTE_RERANK_ENV = "VEXOR_REMOTE_RERANK_API_KEY"
|
|
31
34
|
LEGACY_GEMINI_ENV = "GOOGLE_GENAI_API_KEY"
|
|
@@ -45,6 +48,8 @@ class Config:
|
|
|
45
48
|
model: str = DEFAULT_MODEL
|
|
46
49
|
batch_size: int = DEFAULT_BATCH_SIZE
|
|
47
50
|
embed_concurrency: int = DEFAULT_EMBED_CONCURRENCY
|
|
51
|
+
extract_concurrency: int = DEFAULT_EXTRACT_CONCURRENCY
|
|
52
|
+
extract_backend: str = DEFAULT_EXTRACT_BACKEND
|
|
48
53
|
provider: str = DEFAULT_PROVIDER
|
|
49
54
|
base_url: str | None = None
|
|
50
55
|
auto_index: bool = True
|
|
@@ -81,6 +86,10 @@ def load_config() -> Config:
|
|
|
81
86
|
model=raw.get("model") or DEFAULT_MODEL,
|
|
82
87
|
batch_size=int(raw.get("batch_size", DEFAULT_BATCH_SIZE)),
|
|
83
88
|
embed_concurrency=int(raw.get("embed_concurrency", DEFAULT_EMBED_CONCURRENCY)),
|
|
89
|
+
extract_concurrency=int(
|
|
90
|
+
raw.get("extract_concurrency", DEFAULT_EXTRACT_CONCURRENCY)
|
|
91
|
+
),
|
|
92
|
+
extract_backend=_coerce_extract_backend(raw.get("extract_backend")),
|
|
84
93
|
provider=raw.get("provider") or DEFAULT_PROVIDER,
|
|
85
94
|
base_url=raw.get("base_url") or None,
|
|
86
95
|
auto_index=bool(raw.get("auto_index", True)),
|
|
@@ -100,6 +109,8 @@ def save_config(config: Config) -> None:
|
|
|
100
109
|
data["model"] = config.model
|
|
101
110
|
data["batch_size"] = config.batch_size
|
|
102
111
|
data["embed_concurrency"] = config.embed_concurrency
|
|
112
|
+
data["extract_concurrency"] = config.extract_concurrency
|
|
113
|
+
data["extract_backend"] = config.extract_backend
|
|
103
114
|
if config.provider:
|
|
104
115
|
data["provider"] = config.provider
|
|
105
116
|
if config.base_url:
|
|
@@ -189,6 +200,18 @@ def set_embed_concurrency(value: int) -> None:
|
|
|
189
200
|
save_config(config)
|
|
190
201
|
|
|
191
202
|
|
|
203
|
+
def set_extract_concurrency(value: int) -> None:
|
|
204
|
+
config = load_config()
|
|
205
|
+
config.extract_concurrency = value
|
|
206
|
+
save_config(config)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def set_extract_backend(value: str) -> None:
|
|
210
|
+
config = load_config()
|
|
211
|
+
config.extract_backend = _normalize_extract_backend(value)
|
|
212
|
+
save_config(config)
|
|
213
|
+
|
|
214
|
+
|
|
192
215
|
def set_provider(value: str) -> None:
|
|
193
216
|
config = load_config()
|
|
194
217
|
config.provider = value
|
|
@@ -341,6 +364,8 @@ def _clone_config(config: Config) -> Config:
|
|
|
341
364
|
model=config.model,
|
|
342
365
|
batch_size=config.batch_size,
|
|
343
366
|
embed_concurrency=config.embed_concurrency,
|
|
367
|
+
extract_concurrency=config.extract_concurrency,
|
|
368
|
+
extract_backend=config.extract_backend,
|
|
344
369
|
provider=config.provider,
|
|
345
370
|
base_url=config.base_url,
|
|
346
371
|
auto_index=config.auto_index,
|
|
@@ -374,6 +399,14 @@ def _apply_config_payload(config: Config, payload: Mapping[str, object]) -> None
|
|
|
374
399
|
"embed_concurrency",
|
|
375
400
|
DEFAULT_EMBED_CONCURRENCY,
|
|
376
401
|
)
|
|
402
|
+
if "extract_concurrency" in payload:
|
|
403
|
+
config.extract_concurrency = _coerce_int(
|
|
404
|
+
payload["extract_concurrency"],
|
|
405
|
+
"extract_concurrency",
|
|
406
|
+
DEFAULT_EXTRACT_CONCURRENCY,
|
|
407
|
+
)
|
|
408
|
+
if "extract_backend" in payload:
|
|
409
|
+
config.extract_backend = _normalize_extract_backend(payload["extract_backend"])
|
|
377
410
|
if "provider" in payload:
|
|
378
411
|
config.provider = _coerce_required_str(
|
|
379
412
|
payload["provider"], "provider", DEFAULT_PROVIDER
|
|
@@ -448,6 +481,26 @@ def _coerce_bool(value: object, field: str) -> bool:
|
|
|
448
481
|
raise ValueError(Messages.ERROR_CONFIG_VALUE_INVALID.format(field=field))
|
|
449
482
|
|
|
450
483
|
|
|
484
|
+
def _normalize_extract_backend(value: object) -> str:
|
|
485
|
+
if value is None:
|
|
486
|
+
return DEFAULT_EXTRACT_BACKEND
|
|
487
|
+
if isinstance(value, str):
|
|
488
|
+
normalized = value.strip().lower() or DEFAULT_EXTRACT_BACKEND
|
|
489
|
+
if normalized in SUPPORTED_EXTRACT_BACKENDS:
|
|
490
|
+
return normalized
|
|
491
|
+
raise ValueError(Messages.ERROR_CONFIG_VALUE_INVALID.format(field="extract_backend"))
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def _coerce_extract_backend(value: object) -> str:
|
|
495
|
+
if value is None:
|
|
496
|
+
return DEFAULT_EXTRACT_BACKEND
|
|
497
|
+
if isinstance(value, str):
|
|
498
|
+
normalized = value.strip().lower()
|
|
499
|
+
if normalized in SUPPORTED_EXTRACT_BACKENDS:
|
|
500
|
+
return normalized
|
|
501
|
+
return DEFAULT_EXTRACT_BACKEND
|
|
502
|
+
|
|
503
|
+
|
|
451
504
|
def _normalize_rerank(value: object) -> str:
|
|
452
505
|
if value is None:
|
|
453
506
|
normalized = DEFAULT_RERANK
|
vexor/providers/gemini.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
6
|
+
import time
|
|
6
7
|
from typing import Iterator, Sequence
|
|
7
8
|
|
|
8
9
|
import numpy as np
|
|
@@ -38,14 +39,19 @@ class GeminiEmbeddingBackend:
|
|
|
38
39
|
if base_url:
|
|
39
40
|
client_kwargs["http_options"] = genai_types.HttpOptions(base_url=base_url)
|
|
40
41
|
self._client = genai.Client(**client_kwargs)
|
|
42
|
+
self._executor: ThreadPoolExecutor | None = None
|
|
41
43
|
|
|
42
44
|
def embed(self, texts: Sequence[str]) -> np.ndarray:
|
|
43
45
|
if not texts:
|
|
44
46
|
return np.empty((0, 0), dtype=np.float32)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
47
|
+
if self.concurrency > 1:
|
|
48
|
+
batches = list(_chunk(texts, self.chunk_size))
|
|
49
|
+
if len(batches) > 1:
|
|
50
|
+
vectors_by_batch: list[list[np.ndarray] | None] = [None] * len(batches)
|
|
51
|
+
executor = self._executor
|
|
52
|
+
if executor is None:
|
|
53
|
+
executor = ThreadPoolExecutor(max_workers=self.concurrency)
|
|
54
|
+
self._executor = executor
|
|
49
55
|
future_map = {
|
|
50
56
|
executor.submit(self._embed_batch, batch): idx
|
|
51
57
|
for idx, batch in enumerate(batches)
|
|
@@ -53,23 +59,34 @@ class GeminiEmbeddingBackend:
|
|
|
53
59
|
for future in as_completed(future_map):
|
|
54
60
|
idx = future_map[future]
|
|
55
61
|
vectors_by_batch[idx] = future.result()
|
|
56
|
-
|
|
62
|
+
vectors = [vec for batch in vectors_by_batch if batch for vec in batch]
|
|
63
|
+
else:
|
|
64
|
+
vectors = []
|
|
65
|
+
for batch in batches:
|
|
66
|
+
vectors.extend(self._embed_batch(batch))
|
|
57
67
|
else:
|
|
58
68
|
vectors = []
|
|
59
|
-
for batch in
|
|
69
|
+
for batch in _chunk(texts, self.chunk_size):
|
|
60
70
|
vectors.extend(self._embed_batch(batch))
|
|
61
71
|
if not vectors:
|
|
62
72
|
raise RuntimeError(Messages.ERROR_NO_EMBEDDINGS)
|
|
63
73
|
return np.vstack(vectors)
|
|
64
74
|
|
|
65
75
|
def _embed_batch(self, batch: Sequence[str]) -> list[np.ndarray]:
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
76
|
+
attempt = 0
|
|
77
|
+
while True:
|
|
78
|
+
try:
|
|
79
|
+
response = self._client.models.embed_content(
|
|
80
|
+
model=self.model_name,
|
|
81
|
+
contents=list(batch),
|
|
82
|
+
)
|
|
83
|
+
break
|
|
84
|
+
except genai_errors.ClientError as exc:
|
|
85
|
+
if _should_retry_genai_error(exc) and attempt < _MAX_RETRIES:
|
|
86
|
+
_sleep(_backoff_delay(attempt))
|
|
87
|
+
attempt += 1
|
|
88
|
+
continue
|
|
89
|
+
raise RuntimeError(_format_genai_error(exc)) from exc
|
|
73
90
|
embeddings = getattr(response, "embeddings", None)
|
|
74
91
|
if not embeddings:
|
|
75
92
|
raise RuntimeError(Messages.ERROR_NO_EMBEDDINGS)
|
|
@@ -90,6 +107,55 @@ def _chunk(items: Sequence[str], size: int | None) -> Iterator[Sequence[str]]:
|
|
|
90
107
|
yield items[idx : idx + size]
|
|
91
108
|
|
|
92
109
|
|
|
110
|
+
_RETRYABLE_STATUS_CODES = {408, 429, 500, 502, 503, 504}
|
|
111
|
+
_MAX_RETRIES = 2
|
|
112
|
+
_RETRY_BASE_DELAY = 0.5
|
|
113
|
+
_RETRY_MAX_DELAY = 4.0
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _sleep(seconds: float) -> None:
|
|
117
|
+
time.sleep(seconds)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _backoff_delay(attempt: int) -> float:
|
|
121
|
+
return min(_RETRY_MAX_DELAY, _RETRY_BASE_DELAY * (2**attempt))
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _extract_status_code(exc: Exception) -> int | None:
|
|
125
|
+
for attr in ("status_code", "status", "http_status"):
|
|
126
|
+
value = getattr(exc, attr, None)
|
|
127
|
+
if isinstance(value, int):
|
|
128
|
+
return value
|
|
129
|
+
response = getattr(exc, "response", None)
|
|
130
|
+
if response is not None:
|
|
131
|
+
value = getattr(response, "status_code", None)
|
|
132
|
+
if isinstance(value, int):
|
|
133
|
+
return value
|
|
134
|
+
return None
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _should_retry_genai_error(exc: Exception) -> bool:
|
|
138
|
+
status = _extract_status_code(exc)
|
|
139
|
+
if status in _RETRYABLE_STATUS_CODES:
|
|
140
|
+
return True
|
|
141
|
+
name = exc.__class__.__name__.lower()
|
|
142
|
+
if "ratelimit" in name or "timeout" in name or "temporarily" in name:
|
|
143
|
+
return True
|
|
144
|
+
message = str(exc).lower()
|
|
145
|
+
return any(
|
|
146
|
+
token in message
|
|
147
|
+
for token in (
|
|
148
|
+
"rate limit",
|
|
149
|
+
"timeout",
|
|
150
|
+
"temporar",
|
|
151
|
+
"overload",
|
|
152
|
+
"try again",
|
|
153
|
+
"too many requests",
|
|
154
|
+
"service unavailable",
|
|
155
|
+
)
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
|
|
93
159
|
def _format_genai_error(exc: genai_errors.ClientError) -> str:
|
|
94
160
|
message = getattr(exc, "message", None) or str(exc)
|
|
95
161
|
if "API key" in message:
|
vexor/providers/openai.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
6
|
+
import time
|
|
6
7
|
from typing import Iterator, Sequence
|
|
7
8
|
|
|
8
9
|
import numpy as np
|
|
@@ -35,14 +36,19 @@ class OpenAIEmbeddingBackend:
|
|
|
35
36
|
if base_url:
|
|
36
37
|
client_kwargs["base_url"] = base_url.rstrip("/")
|
|
37
38
|
self._client = OpenAI(**client_kwargs)
|
|
39
|
+
self._executor: ThreadPoolExecutor | None = None
|
|
38
40
|
|
|
39
41
|
def embed(self, texts: Sequence[str]) -> np.ndarray:
|
|
40
42
|
if not texts:
|
|
41
43
|
return np.empty((0, 0), dtype=np.float32)
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
44
|
+
if self.concurrency > 1:
|
|
45
|
+
batches = list(_chunk(texts, self.chunk_size))
|
|
46
|
+
if len(batches) > 1:
|
|
47
|
+
vectors_by_batch: list[list[np.ndarray] | None] = [None] * len(batches)
|
|
48
|
+
executor = self._executor
|
|
49
|
+
if executor is None:
|
|
50
|
+
executor = ThreadPoolExecutor(max_workers=self.concurrency)
|
|
51
|
+
self._executor = executor
|
|
46
52
|
future_map = {
|
|
47
53
|
executor.submit(self._embed_batch, batch): idx
|
|
48
54
|
for idx, batch in enumerate(batches)
|
|
@@ -50,23 +56,34 @@ class OpenAIEmbeddingBackend:
|
|
|
50
56
|
for future in as_completed(future_map):
|
|
51
57
|
idx = future_map[future]
|
|
52
58
|
vectors_by_batch[idx] = future.result()
|
|
53
|
-
|
|
59
|
+
vectors = [vec for batch in vectors_by_batch if batch for vec in batch]
|
|
60
|
+
else:
|
|
61
|
+
vectors = []
|
|
62
|
+
for batch in batches:
|
|
63
|
+
vectors.extend(self._embed_batch(batch))
|
|
54
64
|
else:
|
|
55
65
|
vectors = []
|
|
56
|
-
for batch in
|
|
66
|
+
for batch in _chunk(texts, self.chunk_size):
|
|
57
67
|
vectors.extend(self._embed_batch(batch))
|
|
58
68
|
if not vectors:
|
|
59
69
|
raise RuntimeError(Messages.ERROR_NO_EMBEDDINGS)
|
|
60
70
|
return np.vstack(vectors)
|
|
61
71
|
|
|
62
72
|
def _embed_batch(self, batch: Sequence[str]) -> list[np.ndarray]:
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
73
|
+
attempt = 0
|
|
74
|
+
while True:
|
|
75
|
+
try:
|
|
76
|
+
response = self._client.embeddings.create(
|
|
77
|
+
model=self.model_name,
|
|
78
|
+
input=list(batch),
|
|
79
|
+
)
|
|
80
|
+
break
|
|
81
|
+
except Exception as exc: # pragma: no cover - API client variations
|
|
82
|
+
if _should_retry_openai_error(exc) and attempt < _MAX_RETRIES:
|
|
83
|
+
_sleep(_backoff_delay(attempt))
|
|
84
|
+
attempt += 1
|
|
85
|
+
continue
|
|
86
|
+
raise RuntimeError(_format_openai_error(exc)) from exc
|
|
70
87
|
data = getattr(response, "data", None) or []
|
|
71
88
|
if not data:
|
|
72
89
|
raise RuntimeError(Messages.ERROR_NO_EMBEDDINGS)
|
|
@@ -87,6 +104,55 @@ def _chunk(items: Sequence[str], size: int | None) -> Iterator[Sequence[str]]:
|
|
|
87
104
|
yield items[idx : idx + size]
|
|
88
105
|
|
|
89
106
|
|
|
107
|
+
_RETRYABLE_STATUS_CODES = {408, 429, 500, 502, 503, 504}
|
|
108
|
+
_MAX_RETRIES = 2
|
|
109
|
+
_RETRY_BASE_DELAY = 0.5
|
|
110
|
+
_RETRY_MAX_DELAY = 4.0
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _sleep(seconds: float) -> None:
|
|
114
|
+
time.sleep(seconds)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _backoff_delay(attempt: int) -> float:
|
|
118
|
+
return min(_RETRY_MAX_DELAY, _RETRY_BASE_DELAY * (2**attempt))
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _extract_status_code(exc: Exception) -> int | None:
|
|
122
|
+
for attr in ("status_code", "status", "http_status"):
|
|
123
|
+
value = getattr(exc, attr, None)
|
|
124
|
+
if isinstance(value, int):
|
|
125
|
+
return value
|
|
126
|
+
response = getattr(exc, "response", None)
|
|
127
|
+
if response is not None:
|
|
128
|
+
value = getattr(response, "status_code", None)
|
|
129
|
+
if isinstance(value, int):
|
|
130
|
+
return value
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _should_retry_openai_error(exc: Exception) -> bool:
|
|
135
|
+
status = _extract_status_code(exc)
|
|
136
|
+
if status in _RETRYABLE_STATUS_CODES:
|
|
137
|
+
return True
|
|
138
|
+
name = exc.__class__.__name__.lower()
|
|
139
|
+
if "ratelimit" in name or "timeout" in name or "temporarily" in name:
|
|
140
|
+
return True
|
|
141
|
+
message = str(exc).lower()
|
|
142
|
+
return any(
|
|
143
|
+
token in message
|
|
144
|
+
for token in (
|
|
145
|
+
"rate limit",
|
|
146
|
+
"timeout",
|
|
147
|
+
"temporar",
|
|
148
|
+
"overload",
|
|
149
|
+
"try again",
|
|
150
|
+
"too many requests",
|
|
151
|
+
"service unavailable",
|
|
152
|
+
)
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
|
|
90
156
|
def _format_openai_error(exc: Exception) -> str:
|
|
91
157
|
message = getattr(exc, "message", None) or str(exc)
|
|
92
158
|
return f"{Messages.ERROR_OPENAI_PREFIX}{message}"
|
vexor/services/config_service.py
CHANGED
|
@@ -11,6 +11,8 @@ from ..config import (
|
|
|
11
11
|
set_base_url,
|
|
12
12
|
set_batch_size,
|
|
13
13
|
set_embed_concurrency,
|
|
14
|
+
set_extract_concurrency,
|
|
15
|
+
set_extract_backend,
|
|
14
16
|
set_auto_index,
|
|
15
17
|
set_flashrank_model,
|
|
16
18
|
set_local_cuda,
|
|
@@ -28,6 +30,8 @@ class ConfigUpdateResult:
|
|
|
28
30
|
model_set: bool = False
|
|
29
31
|
batch_size_set: bool = False
|
|
30
32
|
embed_concurrency_set: bool = False
|
|
33
|
+
extract_concurrency_set: bool = False
|
|
34
|
+
extract_backend_set: bool = False
|
|
31
35
|
provider_set: bool = False
|
|
32
36
|
base_url_set: bool = False
|
|
33
37
|
base_url_cleared: bool = False
|
|
@@ -49,6 +53,8 @@ class ConfigUpdateResult:
|
|
|
49
53
|
self.model_set,
|
|
50
54
|
self.batch_size_set,
|
|
51
55
|
self.embed_concurrency_set,
|
|
56
|
+
self.extract_concurrency_set,
|
|
57
|
+
self.extract_backend_set,
|
|
52
58
|
self.provider_set,
|
|
53
59
|
self.base_url_set,
|
|
54
60
|
self.base_url_cleared,
|
|
@@ -71,6 +77,8 @@ def apply_config_updates(
|
|
|
71
77
|
model: str | None = None,
|
|
72
78
|
batch_size: int | None = None,
|
|
73
79
|
embed_concurrency: int | None = None,
|
|
80
|
+
extract_concurrency: int | None = None,
|
|
81
|
+
extract_backend: str | None = None,
|
|
74
82
|
provider: str | None = None,
|
|
75
83
|
base_url: str | None = None,
|
|
76
84
|
clear_base_url: bool = False,
|
|
@@ -101,6 +109,12 @@ def apply_config_updates(
|
|
|
101
109
|
if embed_concurrency is not None:
|
|
102
110
|
set_embed_concurrency(embed_concurrency)
|
|
103
111
|
result.embed_concurrency_set = True
|
|
112
|
+
if extract_concurrency is not None:
|
|
113
|
+
set_extract_concurrency(extract_concurrency)
|
|
114
|
+
result.extract_concurrency_set = True
|
|
115
|
+
if extract_backend is not None:
|
|
116
|
+
set_extract_backend(extract_backend)
|
|
117
|
+
result.extract_backend_set = True
|
|
104
118
|
if provider is not None:
|
|
105
119
|
set_provider(provider)
|
|
106
120
|
result.provider_set = True
|