symbolicai 0.21.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- symai/__init__.py +269 -173
- symai/backend/base.py +123 -110
- symai/backend/engines/drawing/engine_bfl.py +45 -44
- symai/backend/engines/drawing/engine_gpt_image.py +112 -97
- symai/backend/engines/embedding/engine_llama_cpp.py +63 -52
- symai/backend/engines/embedding/engine_openai.py +25 -21
- symai/backend/engines/execute/engine_python.py +19 -18
- symai/backend/engines/files/engine_io.py +104 -95
- symai/backend/engines/imagecaptioning/engine_blip2.py +28 -24
- symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +102 -79
- symai/backend/engines/index/engine_pinecone.py +124 -97
- symai/backend/engines/index/engine_qdrant.py +1011 -0
- symai/backend/engines/index/engine_vectordb.py +84 -56
- symai/backend/engines/lean/engine_lean4.py +96 -52
- symai/backend/engines/neurosymbolic/__init__.py +41 -13
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +330 -248
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +329 -264
- symai/backend/engines/neurosymbolic/engine_cerebras.py +328 -0
- symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +118 -88
- symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +344 -299
- symai/backend/engines/neurosymbolic/engine_groq.py +173 -115
- symai/backend/engines/neurosymbolic/engine_huggingface.py +114 -84
- symai/backend/engines/neurosymbolic/engine_llama_cpp.py +144 -118
- symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +415 -307
- symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +394 -231
- symai/backend/engines/ocr/engine_apilayer.py +23 -27
- symai/backend/engines/output/engine_stdout.py +10 -13
- symai/backend/engines/{webscraping → scrape}/engine_requests.py +101 -54
- symai/backend/engines/search/engine_openai.py +100 -88
- symai/backend/engines/search/engine_parallel.py +665 -0
- symai/backend/engines/search/engine_perplexity.py +44 -45
- symai/backend/engines/search/engine_serpapi.py +37 -34
- symai/backend/engines/speech_to_text/engine_local_whisper.py +54 -51
- symai/backend/engines/symbolic/engine_wolframalpha.py +15 -9
- symai/backend/engines/text_to_speech/engine_openai.py +20 -26
- symai/backend/engines/text_vision/engine_clip.py +39 -37
- symai/backend/engines/userinput/engine_console.py +5 -6
- symai/backend/mixin/__init__.py +13 -0
- symai/backend/mixin/anthropic.py +48 -38
- symai/backend/mixin/deepseek.py +6 -5
- symai/backend/mixin/google.py +7 -4
- symai/backend/mixin/groq.py +2 -4
- symai/backend/mixin/openai.py +140 -110
- symai/backend/settings.py +87 -20
- symai/chat.py +216 -123
- symai/collect/__init__.py +7 -1
- symai/collect/dynamic.py +80 -70
- symai/collect/pipeline.py +67 -51
- symai/collect/stats.py +161 -109
- symai/components.py +707 -360
- symai/constraints.py +24 -12
- symai/core.py +1857 -1233
- symai/core_ext.py +83 -80
- symai/endpoints/api.py +166 -104
- symai/extended/.DS_Store +0 -0
- symai/extended/__init__.py +46 -12
- symai/extended/api_builder.py +29 -21
- symai/extended/arxiv_pdf_parser.py +23 -14
- symai/extended/bibtex_parser.py +9 -6
- symai/extended/conversation.py +156 -126
- symai/extended/document.py +50 -30
- symai/extended/file_merger.py +57 -14
- symai/extended/graph.py +51 -32
- symai/extended/html_style_template.py +18 -14
- symai/extended/interfaces/blip_2.py +2 -3
- symai/extended/interfaces/clip.py +4 -3
- symai/extended/interfaces/console.py +9 -1
- symai/extended/interfaces/dall_e.py +4 -2
- symai/extended/interfaces/file.py +2 -0
- symai/extended/interfaces/flux.py +4 -2
- symai/extended/interfaces/gpt_image.py +16 -7
- symai/extended/interfaces/input.py +2 -1
- symai/extended/interfaces/llava.py +1 -2
- symai/extended/interfaces/{naive_webscraping.py → naive_scrape.py} +4 -3
- symai/extended/interfaces/naive_vectordb.py +9 -10
- symai/extended/interfaces/ocr.py +5 -3
- symai/extended/interfaces/openai_search.py +2 -0
- symai/extended/interfaces/parallel.py +30 -0
- symai/extended/interfaces/perplexity.py +2 -0
- symai/extended/interfaces/pinecone.py +12 -9
- symai/extended/interfaces/python.py +2 -0
- symai/extended/interfaces/serpapi.py +3 -1
- symai/extended/interfaces/terminal.py +2 -4
- symai/extended/interfaces/tts.py +3 -2
- symai/extended/interfaces/whisper.py +3 -2
- symai/extended/interfaces/wolframalpha.py +2 -1
- symai/extended/metrics/__init__.py +11 -1
- symai/extended/metrics/similarity.py +14 -13
- symai/extended/os_command.py +39 -29
- symai/extended/packages/__init__.py +29 -3
- symai/extended/packages/symdev.py +51 -43
- symai/extended/packages/sympkg.py +41 -35
- symai/extended/packages/symrun.py +63 -50
- symai/extended/repo_cloner.py +14 -12
- symai/extended/seo_query_optimizer.py +15 -13
- symai/extended/solver.py +116 -91
- symai/extended/summarizer.py +12 -10
- symai/extended/taypan_interpreter.py +17 -18
- symai/extended/vectordb.py +122 -92
- symai/formatter/__init__.py +9 -1
- symai/formatter/formatter.py +51 -47
- symai/formatter/regex.py +70 -69
- symai/functional.py +325 -176
- symai/imports.py +190 -147
- symai/interfaces.py +57 -28
- symai/memory.py +45 -35
- symai/menu/screen.py +28 -19
- symai/misc/console.py +66 -56
- symai/misc/loader.py +8 -5
- symai/models/__init__.py +17 -1
- symai/models/base.py +395 -236
- symai/models/errors.py +1 -2
- symai/ops/__init__.py +32 -22
- symai/ops/measures.py +24 -25
- symai/ops/primitives.py +1149 -731
- symai/post_processors.py +58 -50
- symai/pre_processors.py +86 -82
- symai/processor.py +21 -13
- symai/prompts.py +764 -685
- symai/server/huggingface_server.py +135 -49
- symai/server/llama_cpp_server.py +21 -11
- symai/server/qdrant_server.py +206 -0
- symai/shell.py +100 -42
- symai/shellsv.py +700 -492
- symai/strategy.py +630 -346
- symai/symbol.py +368 -322
- symai/utils.py +100 -78
- {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/METADATA +22 -10
- symbolicai-1.1.0.dist-info/RECORD +168 -0
- symbolicai-0.21.0.dist-info/RECORD +0 -162
- {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/WHEEL +0 -0
- {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/entry_points.txt +0 -0
- {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/licenses/LICENSE +0 -0
- {symbolicai-0.21.0.dist-info → symbolicai-1.1.0.dist-info}/top_level.txt +0 -0
|
@@ -1,39 +1,112 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import random
|
|
3
|
-
from typing import List, Optional
|
|
4
3
|
|
|
5
4
|
import numpy as np
|
|
6
5
|
import torch
|
|
7
|
-
from fastapi import FastAPI
|
|
6
|
+
from fastapi import FastAPI
|
|
8
7
|
from pydantic import BaseModel
|
|
9
|
-
from transformers import (
|
|
10
|
-
|
|
11
|
-
|
|
8
|
+
from transformers import (
|
|
9
|
+
AutoModelForCausalLM,
|
|
10
|
+
AutoTokenizer,
|
|
11
|
+
BitsAndBytesConfig,
|
|
12
|
+
StoppingCriteria,
|
|
13
|
+
StoppingCriteriaList,
|
|
14
|
+
)
|
|
12
15
|
|
|
13
16
|
# General arguments
|
|
14
17
|
parser = argparse.ArgumentParser(description="FastAPI server for Hugging Face models")
|
|
15
18
|
parser.add_argument("--model", type=str, help="Path to the model")
|
|
16
|
-
parser.add_argument(
|
|
19
|
+
parser.add_argument(
|
|
20
|
+
"--host", type=str, default="localhost", help="Host address. (default: localhost)"
|
|
21
|
+
)
|
|
17
22
|
parser.add_argument("--port", type=int, default=8000, help="Port number. (default: 8000)")
|
|
18
23
|
|
|
19
24
|
# Quantization arguments with 'quant_' prefix
|
|
20
|
-
parser.add_argument(
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
parser.add_argument(
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
25
|
+
parser.add_argument(
|
|
26
|
+
"--quant",
|
|
27
|
+
action="store_true",
|
|
28
|
+
default=False,
|
|
29
|
+
help="Enable quantization; see help for available quantization options (default: False)",
|
|
30
|
+
)
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"--quant_load_in_8bit",
|
|
33
|
+
action="store_true",
|
|
34
|
+
default=False,
|
|
35
|
+
help="Load model in 8-bit precision (default: False)",
|
|
36
|
+
)
|
|
37
|
+
parser.add_argument(
|
|
38
|
+
"--quant_load_in_4bit",
|
|
39
|
+
action="store_true",
|
|
40
|
+
default=False,
|
|
41
|
+
help="Load model in 4-bit precision (default: False)",
|
|
42
|
+
)
|
|
43
|
+
parser.add_argument(
|
|
44
|
+
"--quant_llm_int8_threshold", type=float, default=6.0, help="LLM int8 threshold (default: 6.0)"
|
|
45
|
+
)
|
|
46
|
+
parser.add_argument(
|
|
47
|
+
"--quant_llm_int8_skip_modules",
|
|
48
|
+
type=str,
|
|
49
|
+
nargs="+",
|
|
50
|
+
default=None,
|
|
51
|
+
help="LLM int8 skip modules (default: None)",
|
|
52
|
+
)
|
|
53
|
+
parser.add_argument(
|
|
54
|
+
"--quant_llm_int8_enable_fp32_cpu_offload",
|
|
55
|
+
action="store_true",
|
|
56
|
+
default=False,
|
|
57
|
+
help="Enable FP32 CPU offload for LLM int8 (default: False)",
|
|
58
|
+
)
|
|
59
|
+
parser.add_argument(
|
|
60
|
+
"--quant_llm_int8_has_fp16_weight",
|
|
61
|
+
action="store_true",
|
|
62
|
+
default=False,
|
|
63
|
+
help="LLM int8 has FP16 weight (default: False)",
|
|
64
|
+
)
|
|
65
|
+
parser.add_argument(
|
|
66
|
+
"--quant_bnb_4bit_compute_dtype",
|
|
67
|
+
type=str,
|
|
68
|
+
default=None,
|
|
69
|
+
help="BNB 4-bit compute dtype (default: None)",
|
|
70
|
+
)
|
|
71
|
+
parser.add_argument(
|
|
72
|
+
"--quant_bnb_4bit_quant_type",
|
|
73
|
+
type=str,
|
|
74
|
+
default="fp4",
|
|
75
|
+
help="BNB 4-bit quantization type (default: fp4)",
|
|
76
|
+
)
|
|
77
|
+
parser.add_argument(
|
|
78
|
+
"--quant_bnb_4bit_use_double_quant",
|
|
79
|
+
action="store_true",
|
|
80
|
+
default=False,
|
|
81
|
+
help="Use double quantization for BNB 4-bit (default: False)",
|
|
82
|
+
)
|
|
83
|
+
parser.add_argument(
|
|
84
|
+
"--quant_bnb_4bit_quant_storage",
|
|
85
|
+
type=str,
|
|
86
|
+
default=None,
|
|
87
|
+
help="BNB 4-bit quantization storage (default: None)",
|
|
88
|
+
)
|
|
31
89
|
|
|
32
90
|
# Model inference arguments
|
|
33
91
|
# https://huggingface.co/docs/transformers/main/en/main_classes/model
|
|
34
|
-
parser.add_argument(
|
|
35
|
-
|
|
36
|
-
|
|
92
|
+
parser.add_argument(
|
|
93
|
+
"--torch_dtype",
|
|
94
|
+
type=str,
|
|
95
|
+
default="auto",
|
|
96
|
+
help="A string that is a valid torch.dtype. E.g. “float32” loads the model in torch.float32, “float16” loads in torch.float16 etc. (default: auto)",
|
|
97
|
+
)
|
|
98
|
+
parser.add_argument(
|
|
99
|
+
"--attn_implementation",
|
|
100
|
+
type=str,
|
|
101
|
+
default="sdpa",
|
|
102
|
+
help='The attention implementation to use in the model (if relevant). Can be any of "eager" (manual implementation of the attention), "sdpa" (using F.scaled_dot_product_attention), or "flash_attention_2" (using Dao-AILab/flash-attention). By default, if available, SDPA will be used for torch>=2.1.1. (default: sdpa)',
|
|
103
|
+
)
|
|
104
|
+
parser.add_argument(
|
|
105
|
+
"--device_map",
|
|
106
|
+
type=str,
|
|
107
|
+
default="auto",
|
|
108
|
+
help="A string that is a valid device. E.g. “cuda” loads the model on the GPU, “cpu” loads it on the CPU. (default: auto)",
|
|
109
|
+
)
|
|
37
110
|
|
|
38
111
|
args = parser.parse_args()
|
|
39
112
|
|
|
@@ -47,7 +120,7 @@ quant_config = BitsAndBytesConfig(
|
|
|
47
120
|
bnb_4bit_compute_dtype=args.quant_bnb_4bit_compute_dtype,
|
|
48
121
|
bnb_4bit_quant_type=args.quant_bnb_4bit_quant_type,
|
|
49
122
|
bnb_4bit_use_double_quant=args.quant_bnb_4bit_use_double_quant,
|
|
50
|
-
bnb_4bit_quant_storage=args.quant_bnb_4bit_quant_storage
|
|
123
|
+
bnb_4bit_quant_storage=args.quant_bnb_4bit_quant_storage,
|
|
51
124
|
)
|
|
52
125
|
|
|
53
126
|
app = FastAPI()
|
|
@@ -70,6 +143,7 @@ else:
|
|
|
70
143
|
attn_implementation=args.attn_implementation,
|
|
71
144
|
)
|
|
72
145
|
|
|
146
|
+
|
|
73
147
|
def set_seed(seed):
|
|
74
148
|
random.seed(seed)
|
|
75
149
|
np.random.seed(seed)
|
|
@@ -77,6 +151,7 @@ def set_seed(seed):
|
|
|
77
151
|
if torch.cuda.is_available():
|
|
78
152
|
torch.cuda.manual_seed_all(seed)
|
|
79
153
|
|
|
154
|
+
|
|
80
155
|
class StoppingCriteriaSub(StoppingCriteria):
|
|
81
156
|
# https://discuss.huggingface.co/t/implimentation-of-stopping-criteria-list/20040/13
|
|
82
157
|
def __init__(self, stop_words, tokenizer):
|
|
@@ -84,39 +159,48 @@ class StoppingCriteriaSub(StoppingCriteria):
|
|
|
84
159
|
self.stop_words = stop_words
|
|
85
160
|
self.tokenizer = tokenizer
|
|
86
161
|
|
|
87
|
-
def __call__(self, input_ids: torch.LongTensor,
|
|
162
|
+
def __call__(self, input_ids: torch.LongTensor, _scores: torch.FloatTensor, **_kwargs) -> bool:
|
|
88
163
|
for stop_word in self.stop_words:
|
|
89
|
-
if
|
|
164
|
+
if (
|
|
165
|
+
self.tokenizer.decode(
|
|
166
|
+
input_ids[0][-len(self.tokenizer.encode(stop_word)) :]
|
|
167
|
+
).strip()
|
|
168
|
+
== stop_word
|
|
169
|
+
):
|
|
90
170
|
return True
|
|
91
171
|
return False
|
|
92
172
|
|
|
173
|
+
|
|
93
174
|
class TokenizeRequest(BaseModel):
|
|
94
175
|
input: str
|
|
95
|
-
add_special_tokens:
|
|
176
|
+
add_special_tokens: bool | None = False
|
|
177
|
+
|
|
96
178
|
|
|
97
179
|
class DetokenizeRequest(BaseModel):
|
|
98
|
-
tokens:
|
|
99
|
-
skip_special_tokens:
|
|
180
|
+
tokens: list[int]
|
|
181
|
+
skip_special_tokens: bool | None = True
|
|
182
|
+
|
|
100
183
|
|
|
101
184
|
class ChatCompletionRequest(BaseModel):
|
|
102
|
-
messages:
|
|
103
|
-
temperature: float = 1.
|
|
104
|
-
top_p: float = 1.
|
|
105
|
-
stop:
|
|
106
|
-
seed:
|
|
107
|
-
max_tokens:
|
|
108
|
-
max_tokens_forcing:
|
|
185
|
+
messages: list[dict]
|
|
186
|
+
temperature: float = 1.0
|
|
187
|
+
top_p: float = 1.0
|
|
188
|
+
stop: list[str] | None = None
|
|
189
|
+
seed: int | None = None
|
|
190
|
+
max_tokens: int | None = 2048
|
|
191
|
+
max_tokens_forcing: int | None = None
|
|
109
192
|
top_k: int = 50
|
|
110
193
|
logprobs: bool = False
|
|
111
194
|
do_sample: bool = True
|
|
112
195
|
num_beams: int = 1
|
|
113
196
|
num_beam_groups: int = 1
|
|
114
|
-
eos_token_id:
|
|
197
|
+
eos_token_id: int | None = None
|
|
198
|
+
|
|
115
199
|
|
|
116
200
|
@app.post("/chat")
|
|
117
201
|
def chat_completions(request: ChatCompletionRequest):
|
|
118
202
|
chat = request.messages
|
|
119
|
-
|
|
203
|
+
# @TODO: is there a way to assert that the loaded model has chat capabilities?
|
|
120
204
|
inputs = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
|
|
121
205
|
inputs = tokenizer(inputs, return_tensors="pt").to(model.device)
|
|
122
206
|
|
|
@@ -129,30 +213,29 @@ def chat_completions(request: ChatCompletionRequest):
|
|
|
129
213
|
"do_sample": request.do_sample,
|
|
130
214
|
"num_beams": request.num_beams,
|
|
131
215
|
"num_beam_groups": request.num_beam_groups,
|
|
132
|
-
"eos_token_id": request.eos_token_id
|
|
216
|
+
"eos_token_id": request.eos_token_id
|
|
217
|
+
if request.eos_token_id is not None
|
|
218
|
+
else tokenizer.eos_token_id,
|
|
133
219
|
"output_logits": request.logprobs,
|
|
134
220
|
"return_dict_in_generate": True,
|
|
135
221
|
}
|
|
136
222
|
|
|
137
223
|
if request.stop:
|
|
138
|
-
generation_config["stopping_criteria"] = StoppingCriteriaList(
|
|
224
|
+
generation_config["stopping_criteria"] = StoppingCriteriaList(
|
|
225
|
+
[StoppingCriteriaSub(stop_words=request.stop, tokenizer=tokenizer)]
|
|
226
|
+
)
|
|
139
227
|
|
|
140
228
|
if request.seed:
|
|
141
229
|
set_seed(request.seed)
|
|
142
230
|
|
|
143
231
|
outputs = model.generate(**inputs, **generation_config)
|
|
144
232
|
|
|
145
|
-
new_tokens = outputs.sequences[0][inputs.input_ids.shape[-1]:]
|
|
233
|
+
new_tokens = outputs.sequences[0][inputs.input_ids.shape[-1] :]
|
|
146
234
|
generated_text = tokenizer.decode(new_tokens, skip_special_tokens=True)
|
|
147
235
|
|
|
148
|
-
|
|
236
|
+
return {
|
|
149
237
|
"choices": [
|
|
150
|
-
{
|
|
151
|
-
"message": {
|
|
152
|
-
"role": "assistant",
|
|
153
|
-
"content": generated_text
|
|
154
|
-
}
|
|
155
|
-
},
|
|
238
|
+
{"message": {"role": "assistant", "content": generated_text}},
|
|
156
239
|
],
|
|
157
240
|
"metadata": {
|
|
158
241
|
"model": args.model,
|
|
@@ -172,23 +255,26 @@ def chat_completions(request: ChatCompletionRequest):
|
|
|
172
255
|
"logits": [logits.tolist() for logits in outputs.logits],
|
|
173
256
|
"model_input": chat,
|
|
174
257
|
"model_chat_format": tokenizer.chat_template,
|
|
175
|
-
}
|
|
258
|
+
},
|
|
176
259
|
}
|
|
177
260
|
|
|
178
|
-
return response
|
|
179
261
|
|
|
180
262
|
@app.post("/tokenize")
|
|
181
263
|
def tokenize(request: TokenizeRequest):
|
|
182
264
|
tokens = tokenizer.encode(request.input, add_special_tokens=request.add_special_tokens)
|
|
183
265
|
return {"tokens": tokens}
|
|
184
266
|
|
|
267
|
+
|
|
185
268
|
@app.post("/detokenize")
|
|
186
269
|
def detokenize(request: DetokenizeRequest):
|
|
187
270
|
text = tokenizer.decode(request.tokens, skip_special_tokens=request.skip_special_tokens)
|
|
188
271
|
return {"text": text}
|
|
189
272
|
|
|
273
|
+
|
|
190
274
|
def huggingface_server():
|
|
191
|
-
import
|
|
192
|
-
from functools import partial
|
|
275
|
+
# Lazy imports keep optional server dependencies out of module import path.
|
|
276
|
+
from functools import partial # noqa
|
|
277
|
+
import uvicorn # noqa
|
|
278
|
+
|
|
193
279
|
command = partial(uvicorn.run, app)
|
|
194
280
|
return command, args
|
symai/server/llama_cpp_server.py
CHANGED
|
@@ -1,45 +1,55 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import subprocess
|
|
3
3
|
import sys
|
|
4
|
-
import
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
5
6
|
from loguru import logger
|
|
6
7
|
|
|
8
|
+
|
|
7
9
|
def llama_cpp_server():
|
|
8
10
|
parser = argparse.ArgumentParser(description="A wrapper for llama_cpp.", add_help=False)
|
|
9
|
-
parser.add_argument(
|
|
10
|
-
|
|
11
|
-
|
|
11
|
+
parser.add_argument(
|
|
12
|
+
"--help", action="store_true", help="Show available options for llama_cpp server."
|
|
13
|
+
)
|
|
14
|
+
parser.add_argument(
|
|
15
|
+
"--env",
|
|
16
|
+
choices=["python", "cpp"],
|
|
17
|
+
default="python",
|
|
18
|
+
help="Choose programming environment (python or cpp)",
|
|
19
|
+
)
|
|
12
20
|
parser.add_argument("--cpp-server-path", type=str, help="Path to llama.cpp server executable")
|
|
13
21
|
|
|
14
22
|
main_args, llama_cpp_args = parser.parse_known_args()
|
|
15
23
|
|
|
16
24
|
if main_args.help:
|
|
17
|
-
if main_args.env ==
|
|
25
|
+
if main_args.env == "python":
|
|
18
26
|
command = [sys.executable, "-m", "llama_cpp.server", "--help"]
|
|
19
|
-
subprocess.run(command)
|
|
27
|
+
subprocess.run(command, check=False)
|
|
20
28
|
else:
|
|
21
29
|
if not main_args.cpp_server_path:
|
|
22
30
|
logger.error("Error: --cpp-server-path is required when using cpp environment")
|
|
23
31
|
sys.exit(1)
|
|
24
|
-
if not
|
|
32
|
+
if not Path(main_args.cpp_server_path).exists():
|
|
25
33
|
logger.error(f"Error: Executable not found at {main_args.cpp_server_path}")
|
|
26
34
|
sys.exit(1)
|
|
27
35
|
command = [main_args.cpp_server_path, "--help"]
|
|
28
|
-
subprocess.run(command)
|
|
36
|
+
subprocess.run(command, check=False)
|
|
29
37
|
sys.exit(0)
|
|
30
38
|
|
|
31
|
-
if main_args.env ==
|
|
39
|
+
if main_args.env == "cpp":
|
|
32
40
|
if not main_args.cpp_server_path:
|
|
33
41
|
logger.error("Error: --cpp-server-path is required when using cpp environment")
|
|
34
42
|
sys.exit(1)
|
|
35
|
-
if not
|
|
43
|
+
if not Path(main_args.cpp_server_path).exists():
|
|
36
44
|
logger.error(f"Error: Executable not found at {main_args.cpp_server_path}")
|
|
37
45
|
sys.exit(1)
|
|
38
46
|
command = [
|
|
39
47
|
main_args.cpp_server_path,
|
|
40
48
|
*llama_cpp_args,
|
|
41
49
|
]
|
|
42
|
-
llama_cpp_args = [
|
|
50
|
+
llama_cpp_args = [
|
|
51
|
+
arg for arg in llama_cpp_args if not arg.startswith("--embedding")
|
|
52
|
+
] # Exclude embedding argument
|
|
43
53
|
else: # python
|
|
44
54
|
command = [
|
|
45
55
|
sys.executable,
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import subprocess
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from loguru import logger
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def qdrant_server(): # noqa
|
|
10
|
+
"""
|
|
11
|
+
A wrapper for Qdrant server that supports both Docker and binary execution modes.
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
tuple: (command, args) where command is the list to execute and args are the parsed arguments
|
|
15
|
+
"""
|
|
16
|
+
parser = argparse.ArgumentParser(description="A wrapper for Qdrant server.", add_help=False)
|
|
17
|
+
parser.add_argument(
|
|
18
|
+
"--help", action="store_true", help="Show available options for Qdrant server."
|
|
19
|
+
)
|
|
20
|
+
parser.add_argument(
|
|
21
|
+
"--env",
|
|
22
|
+
choices=["docker", "binary"],
|
|
23
|
+
default="docker",
|
|
24
|
+
help="Choose execution environment (docker or binary)",
|
|
25
|
+
)
|
|
26
|
+
parser.add_argument("--binary-path", type=str, help="Path to Qdrant binary executable")
|
|
27
|
+
parser.add_argument(
|
|
28
|
+
"--docker-image",
|
|
29
|
+
type=str,
|
|
30
|
+
default="qdrant/qdrant:latest",
|
|
31
|
+
help="Docker image to use (default: qdrant/qdrant:latest)",
|
|
32
|
+
)
|
|
33
|
+
parser.add_argument(
|
|
34
|
+
"--host", type=str, default="0.0.0.0", help="Host address to bind to (default: 0.0.0.0)"
|
|
35
|
+
)
|
|
36
|
+
parser.add_argument("--port", type=int, default=6333, help="REST API port (default: 6333)")
|
|
37
|
+
parser.add_argument("--grpc-port", type=int, default=6334, help="gRPC API port (default: 6334)")
|
|
38
|
+
parser.add_argument(
|
|
39
|
+
"--storage-path",
|
|
40
|
+
type=str,
|
|
41
|
+
default="./qdrant_storage",
|
|
42
|
+
help="Path to Qdrant storage directory (default: ./qdrant_storage)",
|
|
43
|
+
)
|
|
44
|
+
parser.add_argument(
|
|
45
|
+
"--config-path", type=str, default=None, help="Path to Qdrant configuration file"
|
|
46
|
+
)
|
|
47
|
+
parser.add_argument(
|
|
48
|
+
"--docker-container-name",
|
|
49
|
+
type=str,
|
|
50
|
+
default="qdrant",
|
|
51
|
+
help="Name for Docker container (default: qdrant)",
|
|
52
|
+
)
|
|
53
|
+
parser.add_argument(
|
|
54
|
+
"--docker-remove",
|
|
55
|
+
action="store_true",
|
|
56
|
+
default=True,
|
|
57
|
+
help="Remove container when it stops (default: True)",
|
|
58
|
+
)
|
|
59
|
+
parser.add_argument(
|
|
60
|
+
"--docker-detach",
|
|
61
|
+
action="store_true",
|
|
62
|
+
default=False,
|
|
63
|
+
help="Run Docker container in detached mode (default: False)",
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
main_args, qdrant_args = parser.parse_known_args()
|
|
67
|
+
|
|
68
|
+
if main_args.help:
|
|
69
|
+
if main_args.env == "docker":
|
|
70
|
+
# Show Docker help
|
|
71
|
+
command = ["docker", "run", "--rm", main_args.docker_image, "--help"]
|
|
72
|
+
subprocess.run(command, check=False)
|
|
73
|
+
else:
|
|
74
|
+
if not main_args.binary_path:
|
|
75
|
+
logger.error("Error: --binary-path is required when using binary environment")
|
|
76
|
+
sys.exit(1)
|
|
77
|
+
if not Path(main_args.binary_path).exists():
|
|
78
|
+
logger.error(f"Error: Binary not found at {main_args.binary_path}")
|
|
79
|
+
sys.exit(1)
|
|
80
|
+
command = [main_args.binary_path, "--help"]
|
|
81
|
+
subprocess.run(command, check=False)
|
|
82
|
+
sys.exit(0)
|
|
83
|
+
|
|
84
|
+
if main_args.env == "binary":
|
|
85
|
+
if not main_args.binary_path:
|
|
86
|
+
logger.error("Error: --binary-path is required when using binary environment")
|
|
87
|
+
sys.exit(1)
|
|
88
|
+
if not Path(main_args.binary_path).exists():
|
|
89
|
+
logger.error(f"Error: Binary not found at {main_args.binary_path}")
|
|
90
|
+
sys.exit(1)
|
|
91
|
+
|
|
92
|
+
# Build command for binary execution
|
|
93
|
+
command = [main_args.binary_path]
|
|
94
|
+
|
|
95
|
+
# Ensure storage directory exists
|
|
96
|
+
storage_path = Path(main_args.storage_path)
|
|
97
|
+
storage_path.mkdir(parents=True, exist_ok=True)
|
|
98
|
+
abs_storage_path = str(storage_path.resolve())
|
|
99
|
+
|
|
100
|
+
# Add standard Qdrant arguments
|
|
101
|
+
# Set storage path via environment variable or command argument
|
|
102
|
+
# Qdrant binary accepts --storage-path argument
|
|
103
|
+
command.extend(["--storage-path", abs_storage_path])
|
|
104
|
+
|
|
105
|
+
# Add host, port, and grpc-port arguments
|
|
106
|
+
command.extend(["--host", main_args.host])
|
|
107
|
+
command.extend(["--port", str(main_args.port)])
|
|
108
|
+
command.extend(["--grpc-port", str(main_args.grpc_port)])
|
|
109
|
+
|
|
110
|
+
if main_args.config_path:
|
|
111
|
+
command.extend(["--config-path", main_args.config_path])
|
|
112
|
+
|
|
113
|
+
# Add any additional Qdrant-specific arguments
|
|
114
|
+
command.extend(qdrant_args)
|
|
115
|
+
|
|
116
|
+
else: # docker
|
|
117
|
+
# Ensure storage directory exists
|
|
118
|
+
storage_path = Path(main_args.storage_path)
|
|
119
|
+
storage_path.mkdir(parents=True, exist_ok=True)
|
|
120
|
+
abs_storage_path = str(storage_path.resolve())
|
|
121
|
+
|
|
122
|
+
# Build Docker command
|
|
123
|
+
command = ["docker", "run"]
|
|
124
|
+
|
|
125
|
+
# Container management options
|
|
126
|
+
if main_args.docker_remove:
|
|
127
|
+
command.append("--rm")
|
|
128
|
+
|
|
129
|
+
if main_args.docker_detach:
|
|
130
|
+
command.append("-d")
|
|
131
|
+
# Note: We don't add -it by default to avoid issues in non-interactive environments
|
|
132
|
+
# Users can add it manually if needed via qdrant_args
|
|
133
|
+
|
|
134
|
+
# Container name
|
|
135
|
+
command.extend(["--name", main_args.docker_container_name])
|
|
136
|
+
|
|
137
|
+
# Port mappings
|
|
138
|
+
command.extend(["-p", f"{main_args.port}:6333"])
|
|
139
|
+
command.extend(["-p", f"{main_args.grpc_port}:6334"])
|
|
140
|
+
|
|
141
|
+
# Volume mount for storage
|
|
142
|
+
command.extend(["-v", f"{abs_storage_path}:/qdrant/storage:z"])
|
|
143
|
+
|
|
144
|
+
# Volume mount for config (if provided)
|
|
145
|
+
# Note: Qdrant Docker image accepts environment variables and config files
|
|
146
|
+
# For custom config, mount it as a volume before the image name
|
|
147
|
+
if main_args.config_path:
|
|
148
|
+
config_path = Path(main_args.config_path)
|
|
149
|
+
abs_config_path = config_path.resolve()
|
|
150
|
+
config_dir = str(abs_config_path.parent)
|
|
151
|
+
command.extend(["-v", f"{config_dir}:/qdrant/config:z"])
|
|
152
|
+
# Qdrant looks for config.yaml in /qdrant/config by default
|
|
153
|
+
|
|
154
|
+
# Set storage path environment variable to use the mounted volume
|
|
155
|
+
command.extend(["-e", "QDRANT__STORAGE__STORAGE_PATH=/qdrant/storage"])
|
|
156
|
+
|
|
157
|
+
# Docker image
|
|
158
|
+
command.append(main_args.docker_image)
|
|
159
|
+
|
|
160
|
+
# Qdrant server arguments (if any additional ones are passed)
|
|
161
|
+
|
|
162
|
+
# Add any additional Qdrant arguments
|
|
163
|
+
if qdrant_args:
|
|
164
|
+
command.extend(qdrant_args)
|
|
165
|
+
|
|
166
|
+
# Prepare args for config storage (similar to llama_cpp_server pattern)
|
|
167
|
+
# Extract key-value pairs for configuration
|
|
168
|
+
config_args = []
|
|
169
|
+
if main_args.env == "docker":
|
|
170
|
+
config_args = [
|
|
171
|
+
"--env",
|
|
172
|
+
main_args.env,
|
|
173
|
+
"--host",
|
|
174
|
+
main_args.host,
|
|
175
|
+
"--port",
|
|
176
|
+
str(main_args.port),
|
|
177
|
+
"--grpc-port",
|
|
178
|
+
str(main_args.grpc_port),
|
|
179
|
+
"--storage-path",
|
|
180
|
+
main_args.storage_path,
|
|
181
|
+
"--docker-image",
|
|
182
|
+
main_args.docker_image,
|
|
183
|
+
"--docker-container-name",
|
|
184
|
+
main_args.docker_container_name,
|
|
185
|
+
]
|
|
186
|
+
if main_args.config_path:
|
|
187
|
+
config_args.extend(["--config-path", main_args.config_path])
|
|
188
|
+
else:
|
|
189
|
+
config_args = [
|
|
190
|
+
"--env",
|
|
191
|
+
main_args.env,
|
|
192
|
+
"--binary-path",
|
|
193
|
+
main_args.binary_path,
|
|
194
|
+
"--host",
|
|
195
|
+
main_args.host,
|
|
196
|
+
"--port",
|
|
197
|
+
str(main_args.port),
|
|
198
|
+
"--grpc-port",
|
|
199
|
+
str(main_args.grpc_port),
|
|
200
|
+
"--storage-path",
|
|
201
|
+
main_args.storage_path,
|
|
202
|
+
]
|
|
203
|
+
if main_args.config_path:
|
|
204
|
+
config_args.extend(["--config-path", main_args.config_path])
|
|
205
|
+
|
|
206
|
+
return command, config_args
|