symbolicai 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- symai/__init__.py +198 -134
- symai/backend/base.py +51 -51
- symai/backend/engines/drawing/engine_bfl.py +33 -33
- symai/backend/engines/drawing/engine_gpt_image.py +4 -10
- symai/backend/engines/embedding/engine_llama_cpp.py +50 -35
- symai/backend/engines/embedding/engine_openai.py +22 -16
- symai/backend/engines/execute/engine_python.py +16 -16
- symai/backend/engines/files/engine_io.py +51 -49
- symai/backend/engines/imagecaptioning/engine_blip2.py +27 -23
- symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +53 -46
- symai/backend/engines/index/engine_pinecone.py +116 -88
- symai/backend/engines/index/engine_qdrant.py +1011 -0
- symai/backend/engines/index/engine_vectordb.py +78 -52
- symai/backend/engines/lean/engine_lean4.py +65 -25
- symai/backend/engines/neurosymbolic/__init__.py +28 -28
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +137 -135
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +145 -152
- symai/backend/engines/neurosymbolic/engine_cerebras.py +328 -0
- symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +75 -49
- symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +199 -155
- symai/backend/engines/neurosymbolic/engine_groq.py +106 -72
- symai/backend/engines/neurosymbolic/engine_huggingface.py +100 -67
- symai/backend/engines/neurosymbolic/engine_llama_cpp.py +121 -93
- symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +213 -132
- symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +180 -137
- symai/backend/engines/ocr/engine_apilayer.py +18 -20
- symai/backend/engines/output/engine_stdout.py +9 -9
- symai/backend/engines/{webscraping → scrape}/engine_requests.py +25 -11
- symai/backend/engines/search/engine_openai.py +95 -83
- symai/backend/engines/search/engine_parallel.py +665 -0
- symai/backend/engines/search/engine_perplexity.py +40 -41
- symai/backend/engines/search/engine_serpapi.py +33 -28
- symai/backend/engines/speech_to_text/engine_local_whisper.py +37 -27
- symai/backend/engines/symbolic/engine_wolframalpha.py +14 -8
- symai/backend/engines/text_to_speech/engine_openai.py +15 -19
- symai/backend/engines/text_vision/engine_clip.py +34 -28
- symai/backend/engines/userinput/engine_console.py +3 -4
- symai/backend/mixin/anthropic.py +48 -40
- symai/backend/mixin/deepseek.py +4 -5
- symai/backend/mixin/google.py +5 -4
- symai/backend/mixin/groq.py +2 -4
- symai/backend/mixin/openai.py +132 -110
- symai/backend/settings.py +14 -14
- symai/chat.py +164 -94
- symai/collect/dynamic.py +13 -11
- symai/collect/pipeline.py +39 -31
- symai/collect/stats.py +109 -69
- symai/components.py +556 -238
- symai/constraints.py +14 -5
- symai/core.py +1495 -1210
- symai/core_ext.py +55 -50
- symai/endpoints/api.py +113 -58
- symai/extended/api_builder.py +22 -17
- symai/extended/arxiv_pdf_parser.py +13 -5
- symai/extended/bibtex_parser.py +8 -4
- symai/extended/conversation.py +88 -69
- symai/extended/document.py +40 -27
- symai/extended/file_merger.py +45 -7
- symai/extended/graph.py +38 -24
- symai/extended/html_style_template.py +17 -11
- symai/extended/interfaces/blip_2.py +1 -1
- symai/extended/interfaces/clip.py +4 -2
- symai/extended/interfaces/console.py +5 -3
- symai/extended/interfaces/dall_e.py +3 -1
- symai/extended/interfaces/file.py +2 -0
- symai/extended/interfaces/flux.py +3 -1
- symai/extended/interfaces/gpt_image.py +15 -6
- symai/extended/interfaces/input.py +2 -1
- symai/extended/interfaces/llava.py +1 -1
- symai/extended/interfaces/{naive_webscraping.py → naive_scrape.py} +3 -2
- symai/extended/interfaces/naive_vectordb.py +2 -2
- symai/extended/interfaces/ocr.py +4 -2
- symai/extended/interfaces/openai_search.py +2 -0
- symai/extended/interfaces/parallel.py +30 -0
- symai/extended/interfaces/perplexity.py +2 -0
- symai/extended/interfaces/pinecone.py +6 -4
- symai/extended/interfaces/python.py +2 -0
- symai/extended/interfaces/serpapi.py +2 -0
- symai/extended/interfaces/terminal.py +0 -1
- symai/extended/interfaces/tts.py +2 -1
- symai/extended/interfaces/whisper.py +2 -1
- symai/extended/interfaces/wolframalpha.py +1 -0
- symai/extended/metrics/__init__.py +1 -1
- symai/extended/metrics/similarity.py +5 -2
- symai/extended/os_command.py +31 -22
- symai/extended/packages/symdev.py +39 -34
- symai/extended/packages/sympkg.py +30 -27
- symai/extended/packages/symrun.py +46 -35
- symai/extended/repo_cloner.py +10 -9
- symai/extended/seo_query_optimizer.py +15 -12
- symai/extended/solver.py +104 -76
- symai/extended/summarizer.py +8 -7
- symai/extended/taypan_interpreter.py +10 -9
- symai/extended/vectordb.py +28 -15
- symai/formatter/formatter.py +39 -31
- symai/formatter/regex.py +46 -44
- symai/functional.py +184 -86
- symai/imports.py +85 -51
- symai/interfaces.py +1 -1
- symai/memory.py +33 -24
- symai/menu/screen.py +28 -19
- symai/misc/console.py +27 -27
- symai/misc/loader.py +4 -3
- symai/models/base.py +147 -76
- symai/models/errors.py +1 -1
- symai/ops/__init__.py +1 -1
- symai/ops/measures.py +17 -14
- symai/ops/primitives.py +933 -635
- symai/post_processors.py +28 -24
- symai/pre_processors.py +58 -52
- symai/processor.py +15 -9
- symai/prompts.py +714 -649
- symai/server/huggingface_server.py +115 -32
- symai/server/llama_cpp_server.py +14 -6
- symai/server/qdrant_server.py +206 -0
- symai/shell.py +98 -39
- symai/shellsv.py +307 -223
- symai/strategy.py +135 -81
- symai/symbol.py +276 -225
- symai/utils.py +62 -46
- {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/METADATA +19 -9
- symbolicai-1.1.0.dist-info/RECORD +168 -0
- symbolicai-1.0.0.dist-info/RECORD +0 -163
- {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/WHEEL +0 -0
- {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/entry_points.txt +0 -0
- {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/licenses/LICENSE +0 -0
- {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/top_level.txt +0 -0
|
@@ -16,27 +16,97 @@ from transformers import (
|
|
|
16
16
|
# General arguments
|
|
17
17
|
parser = argparse.ArgumentParser(description="FastAPI server for Hugging Face models")
|
|
18
18
|
parser.add_argument("--model", type=str, help="Path to the model")
|
|
19
|
-
parser.add_argument(
|
|
19
|
+
parser.add_argument(
|
|
20
|
+
"--host", type=str, default="localhost", help="Host address. (default: localhost)"
|
|
21
|
+
)
|
|
20
22
|
parser.add_argument("--port", type=int, default=8000, help="Port number. (default: 8000)")
|
|
21
23
|
|
|
22
24
|
# Quantization arguments with 'quant_' prefix
|
|
23
|
-
parser.add_argument(
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
parser.add_argument(
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
25
|
+
parser.add_argument(
|
|
26
|
+
"--quant",
|
|
27
|
+
action="store_true",
|
|
28
|
+
default=False,
|
|
29
|
+
help="Enable quantization; see help for available quantization options (default: False)",
|
|
30
|
+
)
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"--quant_load_in_8bit",
|
|
33
|
+
action="store_true",
|
|
34
|
+
default=False,
|
|
35
|
+
help="Load model in 8-bit precision (default: False)",
|
|
36
|
+
)
|
|
37
|
+
parser.add_argument(
|
|
38
|
+
"--quant_load_in_4bit",
|
|
39
|
+
action="store_true",
|
|
40
|
+
default=False,
|
|
41
|
+
help="Load model in 4-bit precision (default: False)",
|
|
42
|
+
)
|
|
43
|
+
parser.add_argument(
|
|
44
|
+
"--quant_llm_int8_threshold", type=float, default=6.0, help="LLM int8 threshold (default: 6.0)"
|
|
45
|
+
)
|
|
46
|
+
parser.add_argument(
|
|
47
|
+
"--quant_llm_int8_skip_modules",
|
|
48
|
+
type=str,
|
|
49
|
+
nargs="+",
|
|
50
|
+
default=None,
|
|
51
|
+
help="LLM int8 skip modules (default: None)",
|
|
52
|
+
)
|
|
53
|
+
parser.add_argument(
|
|
54
|
+
"--quant_llm_int8_enable_fp32_cpu_offload",
|
|
55
|
+
action="store_true",
|
|
56
|
+
default=False,
|
|
57
|
+
help="Enable FP32 CPU offload for LLM int8 (default: False)",
|
|
58
|
+
)
|
|
59
|
+
parser.add_argument(
|
|
60
|
+
"--quant_llm_int8_has_fp16_weight",
|
|
61
|
+
action="store_true",
|
|
62
|
+
default=False,
|
|
63
|
+
help="LLM int8 has FP16 weight (default: False)",
|
|
64
|
+
)
|
|
65
|
+
parser.add_argument(
|
|
66
|
+
"--quant_bnb_4bit_compute_dtype",
|
|
67
|
+
type=str,
|
|
68
|
+
default=None,
|
|
69
|
+
help="BNB 4-bit compute dtype (default: None)",
|
|
70
|
+
)
|
|
71
|
+
parser.add_argument(
|
|
72
|
+
"--quant_bnb_4bit_quant_type",
|
|
73
|
+
type=str,
|
|
74
|
+
default="fp4",
|
|
75
|
+
help="BNB 4-bit quantization type (default: fp4)",
|
|
76
|
+
)
|
|
77
|
+
parser.add_argument(
|
|
78
|
+
"--quant_bnb_4bit_use_double_quant",
|
|
79
|
+
action="store_true",
|
|
80
|
+
default=False,
|
|
81
|
+
help="Use double quantization for BNB 4-bit (default: False)",
|
|
82
|
+
)
|
|
83
|
+
parser.add_argument(
|
|
84
|
+
"--quant_bnb_4bit_quant_storage",
|
|
85
|
+
type=str,
|
|
86
|
+
default=None,
|
|
87
|
+
help="BNB 4-bit quantization storage (default: None)",
|
|
88
|
+
)
|
|
34
89
|
|
|
35
90
|
# Model inference arguments
|
|
36
91
|
# https://huggingface.co/docs/transformers/main/en/main_classes/model
|
|
37
|
-
parser.add_argument(
|
|
38
|
-
|
|
39
|
-
|
|
92
|
+
parser.add_argument(
|
|
93
|
+
"--torch_dtype",
|
|
94
|
+
type=str,
|
|
95
|
+
default="auto",
|
|
96
|
+
help="A string that is a valid torch.dtype. E.g. “float32” loads the model in torch.float32, “float16” loads in torch.float16 etc. (default: auto)",
|
|
97
|
+
)
|
|
98
|
+
parser.add_argument(
|
|
99
|
+
"--attn_implementation",
|
|
100
|
+
type=str,
|
|
101
|
+
default="sdpa",
|
|
102
|
+
help='The attention implementation to use in the model (if relevant). Can be any of "eager" (manual implementation of the attention), "sdpa" (using F.scaled_dot_product_attention), or "flash_attention_2" (using Dao-AILab/flash-attention). By default, if available, SDPA will be used for torch>=2.1.1. (default: sdpa)',
|
|
103
|
+
)
|
|
104
|
+
parser.add_argument(
|
|
105
|
+
"--device_map",
|
|
106
|
+
type=str,
|
|
107
|
+
default="auto",
|
|
108
|
+
help="A string that is a valid device. E.g. “cuda” loads the model on the GPU, “cpu” loads it on the CPU. (default: auto)",
|
|
109
|
+
)
|
|
40
110
|
|
|
41
111
|
args = parser.parse_args()
|
|
42
112
|
|
|
@@ -50,7 +120,7 @@ quant_config = BitsAndBytesConfig(
|
|
|
50
120
|
bnb_4bit_compute_dtype=args.quant_bnb_4bit_compute_dtype,
|
|
51
121
|
bnb_4bit_quant_type=args.quant_bnb_4bit_quant_type,
|
|
52
122
|
bnb_4bit_use_double_quant=args.quant_bnb_4bit_use_double_quant,
|
|
53
|
-
bnb_4bit_quant_storage=args.quant_bnb_4bit_quant_storage
|
|
123
|
+
bnb_4bit_quant_storage=args.quant_bnb_4bit_quant_storage,
|
|
54
124
|
)
|
|
55
125
|
|
|
56
126
|
app = FastAPI()
|
|
@@ -73,6 +143,7 @@ else:
|
|
|
73
143
|
attn_implementation=args.attn_implementation,
|
|
74
144
|
)
|
|
75
145
|
|
|
146
|
+
|
|
76
147
|
def set_seed(seed):
|
|
77
148
|
random.seed(seed)
|
|
78
149
|
np.random.seed(seed)
|
|
@@ -80,6 +151,7 @@ def set_seed(seed):
|
|
|
80
151
|
if torch.cuda.is_available():
|
|
81
152
|
torch.cuda.manual_seed_all(seed)
|
|
82
153
|
|
|
154
|
+
|
|
83
155
|
class StoppingCriteriaSub(StoppingCriteria):
|
|
84
156
|
# https://discuss.huggingface.co/t/implimentation-of-stopping-criteria-list/20040/13
|
|
85
157
|
def __init__(self, stop_words, tokenizer):
|
|
@@ -89,22 +161,30 @@ class StoppingCriteriaSub(StoppingCriteria):
|
|
|
89
161
|
|
|
90
162
|
def __call__(self, input_ids: torch.LongTensor, _scores: torch.FloatTensor, **_kwargs) -> bool:
|
|
91
163
|
for stop_word in self.stop_words:
|
|
92
|
-
if
|
|
164
|
+
if (
|
|
165
|
+
self.tokenizer.decode(
|
|
166
|
+
input_ids[0][-len(self.tokenizer.encode(stop_word)) :]
|
|
167
|
+
).strip()
|
|
168
|
+
== stop_word
|
|
169
|
+
):
|
|
93
170
|
return True
|
|
94
171
|
return False
|
|
95
172
|
|
|
173
|
+
|
|
96
174
|
class TokenizeRequest(BaseModel):
|
|
97
175
|
input: str
|
|
98
176
|
add_special_tokens: bool | None = False
|
|
99
177
|
|
|
178
|
+
|
|
100
179
|
class DetokenizeRequest(BaseModel):
|
|
101
180
|
tokens: list[int]
|
|
102
181
|
skip_special_tokens: bool | None = True
|
|
103
182
|
|
|
183
|
+
|
|
104
184
|
class ChatCompletionRequest(BaseModel):
|
|
105
185
|
messages: list[dict]
|
|
106
|
-
temperature: float = 1.
|
|
107
|
-
top_p: float = 1.
|
|
186
|
+
temperature: float = 1.0
|
|
187
|
+
top_p: float = 1.0
|
|
108
188
|
stop: list[str] | None = None
|
|
109
189
|
seed: int | None = None
|
|
110
190
|
max_tokens: int | None = 2048
|
|
@@ -116,10 +196,11 @@ class ChatCompletionRequest(BaseModel):
|
|
|
116
196
|
num_beam_groups: int = 1
|
|
117
197
|
eos_token_id: int | None = None
|
|
118
198
|
|
|
199
|
+
|
|
119
200
|
@app.post("/chat")
|
|
120
201
|
def chat_completions(request: ChatCompletionRequest):
|
|
121
202
|
chat = request.messages
|
|
122
|
-
|
|
203
|
+
# @TODO: is there a way to assert that the loaded model has chat capabilities?
|
|
123
204
|
inputs = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
|
|
124
205
|
inputs = tokenizer(inputs, return_tensors="pt").to(model.device)
|
|
125
206
|
|
|
@@ -132,30 +213,29 @@ def chat_completions(request: ChatCompletionRequest):
|
|
|
132
213
|
"do_sample": request.do_sample,
|
|
133
214
|
"num_beams": request.num_beams,
|
|
134
215
|
"num_beam_groups": request.num_beam_groups,
|
|
135
|
-
"eos_token_id": request.eos_token_id
|
|
216
|
+
"eos_token_id": request.eos_token_id
|
|
217
|
+
if request.eos_token_id is not None
|
|
218
|
+
else tokenizer.eos_token_id,
|
|
136
219
|
"output_logits": request.logprobs,
|
|
137
220
|
"return_dict_in_generate": True,
|
|
138
221
|
}
|
|
139
222
|
|
|
140
223
|
if request.stop:
|
|
141
|
-
generation_config["stopping_criteria"] = StoppingCriteriaList(
|
|
224
|
+
generation_config["stopping_criteria"] = StoppingCriteriaList(
|
|
225
|
+
[StoppingCriteriaSub(stop_words=request.stop, tokenizer=tokenizer)]
|
|
226
|
+
)
|
|
142
227
|
|
|
143
228
|
if request.seed:
|
|
144
229
|
set_seed(request.seed)
|
|
145
230
|
|
|
146
231
|
outputs = model.generate(**inputs, **generation_config)
|
|
147
232
|
|
|
148
|
-
new_tokens = outputs.sequences[0][inputs.input_ids.shape[-1]:]
|
|
233
|
+
new_tokens = outputs.sequences[0][inputs.input_ids.shape[-1] :]
|
|
149
234
|
generated_text = tokenizer.decode(new_tokens, skip_special_tokens=True)
|
|
150
235
|
|
|
151
236
|
return {
|
|
152
237
|
"choices": [
|
|
153
|
-
{
|
|
154
|
-
"message": {
|
|
155
|
-
"role": "assistant",
|
|
156
|
-
"content": generated_text
|
|
157
|
-
}
|
|
158
|
-
},
|
|
238
|
+
{"message": {"role": "assistant", "content": generated_text}},
|
|
159
239
|
],
|
|
160
240
|
"metadata": {
|
|
161
241
|
"model": args.model,
|
|
@@ -175,23 +255,26 @@ def chat_completions(request: ChatCompletionRequest):
|
|
|
175
255
|
"logits": [logits.tolist() for logits in outputs.logits],
|
|
176
256
|
"model_input": chat,
|
|
177
257
|
"model_chat_format": tokenizer.chat_template,
|
|
178
|
-
}
|
|
258
|
+
},
|
|
179
259
|
}
|
|
180
260
|
|
|
261
|
+
|
|
181
262
|
@app.post("/tokenize")
|
|
182
263
|
def tokenize(request: TokenizeRequest):
|
|
183
264
|
tokens = tokenizer.encode(request.input, add_special_tokens=request.add_special_tokens)
|
|
184
265
|
return {"tokens": tokens}
|
|
185
266
|
|
|
267
|
+
|
|
186
268
|
@app.post("/detokenize")
|
|
187
269
|
def detokenize(request: DetokenizeRequest):
|
|
188
270
|
text = tokenizer.decode(request.tokens, skip_special_tokens=request.skip_special_tokens)
|
|
189
271
|
return {"text": text}
|
|
190
272
|
|
|
273
|
+
|
|
191
274
|
def huggingface_server():
|
|
192
275
|
# Lazy imports keep optional server dependencies out of module import path.
|
|
193
|
-
from functools import partial
|
|
194
|
-
import uvicorn
|
|
276
|
+
from functools import partial # noqa
|
|
277
|
+
import uvicorn # noqa
|
|
195
278
|
|
|
196
279
|
command = partial(uvicorn.run, app)
|
|
197
280
|
return command, args
|
symai/server/llama_cpp_server.py
CHANGED
|
@@ -8,15 +8,21 @@ from loguru import logger
|
|
|
8
8
|
|
|
9
9
|
def llama_cpp_server():
|
|
10
10
|
parser = argparse.ArgumentParser(description="A wrapper for llama_cpp.", add_help=False)
|
|
11
|
-
parser.add_argument(
|
|
12
|
-
|
|
13
|
-
|
|
11
|
+
parser.add_argument(
|
|
12
|
+
"--help", action="store_true", help="Show available options for llama_cpp server."
|
|
13
|
+
)
|
|
14
|
+
parser.add_argument(
|
|
15
|
+
"--env",
|
|
16
|
+
choices=["python", "cpp"],
|
|
17
|
+
default="python",
|
|
18
|
+
help="Choose programming environment (python or cpp)",
|
|
19
|
+
)
|
|
14
20
|
parser.add_argument("--cpp-server-path", type=str, help="Path to llama.cpp server executable")
|
|
15
21
|
|
|
16
22
|
main_args, llama_cpp_args = parser.parse_known_args()
|
|
17
23
|
|
|
18
24
|
if main_args.help:
|
|
19
|
-
if main_args.env ==
|
|
25
|
+
if main_args.env == "python":
|
|
20
26
|
command = [sys.executable, "-m", "llama_cpp.server", "--help"]
|
|
21
27
|
subprocess.run(command, check=False)
|
|
22
28
|
else:
|
|
@@ -30,7 +36,7 @@ def llama_cpp_server():
|
|
|
30
36
|
subprocess.run(command, check=False)
|
|
31
37
|
sys.exit(0)
|
|
32
38
|
|
|
33
|
-
if main_args.env ==
|
|
39
|
+
if main_args.env == "cpp":
|
|
34
40
|
if not main_args.cpp_server_path:
|
|
35
41
|
logger.error("Error: --cpp-server-path is required when using cpp environment")
|
|
36
42
|
sys.exit(1)
|
|
@@ -41,7 +47,9 @@ def llama_cpp_server():
|
|
|
41
47
|
main_args.cpp_server_path,
|
|
42
48
|
*llama_cpp_args,
|
|
43
49
|
]
|
|
44
|
-
llama_cpp_args = [
|
|
50
|
+
llama_cpp_args = [
|
|
51
|
+
arg for arg in llama_cpp_args if not arg.startswith("--embedding")
|
|
52
|
+
] # Exclude embedding argument
|
|
45
53
|
else: # python
|
|
46
54
|
command = [
|
|
47
55
|
sys.executable,
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import subprocess
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from loguru import logger
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def qdrant_server(): # noqa
|
|
10
|
+
"""
|
|
11
|
+
A wrapper for Qdrant server that supports both Docker and binary execution modes.
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
tuple: (command, args) where command is the list to execute and args are the parsed arguments
|
|
15
|
+
"""
|
|
16
|
+
parser = argparse.ArgumentParser(description="A wrapper for Qdrant server.", add_help=False)
|
|
17
|
+
parser.add_argument(
|
|
18
|
+
"--help", action="store_true", help="Show available options for Qdrant server."
|
|
19
|
+
)
|
|
20
|
+
parser.add_argument(
|
|
21
|
+
"--env",
|
|
22
|
+
choices=["docker", "binary"],
|
|
23
|
+
default="docker",
|
|
24
|
+
help="Choose execution environment (docker or binary)",
|
|
25
|
+
)
|
|
26
|
+
parser.add_argument("--binary-path", type=str, help="Path to Qdrant binary executable")
|
|
27
|
+
parser.add_argument(
|
|
28
|
+
"--docker-image",
|
|
29
|
+
type=str,
|
|
30
|
+
default="qdrant/qdrant:latest",
|
|
31
|
+
help="Docker image to use (default: qdrant/qdrant:latest)",
|
|
32
|
+
)
|
|
33
|
+
parser.add_argument(
|
|
34
|
+
"--host", type=str, default="0.0.0.0", help="Host address to bind to (default: 0.0.0.0)"
|
|
35
|
+
)
|
|
36
|
+
parser.add_argument("--port", type=int, default=6333, help="REST API port (default: 6333)")
|
|
37
|
+
parser.add_argument("--grpc-port", type=int, default=6334, help="gRPC API port (default: 6334)")
|
|
38
|
+
parser.add_argument(
|
|
39
|
+
"--storage-path",
|
|
40
|
+
type=str,
|
|
41
|
+
default="./qdrant_storage",
|
|
42
|
+
help="Path to Qdrant storage directory (default: ./qdrant_storage)",
|
|
43
|
+
)
|
|
44
|
+
parser.add_argument(
|
|
45
|
+
"--config-path", type=str, default=None, help="Path to Qdrant configuration file"
|
|
46
|
+
)
|
|
47
|
+
parser.add_argument(
|
|
48
|
+
"--docker-container-name",
|
|
49
|
+
type=str,
|
|
50
|
+
default="qdrant",
|
|
51
|
+
help="Name for Docker container (default: qdrant)",
|
|
52
|
+
)
|
|
53
|
+
parser.add_argument(
|
|
54
|
+
"--docker-remove",
|
|
55
|
+
action="store_true",
|
|
56
|
+
default=True,
|
|
57
|
+
help="Remove container when it stops (default: True)",
|
|
58
|
+
)
|
|
59
|
+
parser.add_argument(
|
|
60
|
+
"--docker-detach",
|
|
61
|
+
action="store_true",
|
|
62
|
+
default=False,
|
|
63
|
+
help="Run Docker container in detached mode (default: False)",
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
main_args, qdrant_args = parser.parse_known_args()
|
|
67
|
+
|
|
68
|
+
if main_args.help:
|
|
69
|
+
if main_args.env == "docker":
|
|
70
|
+
# Show Docker help
|
|
71
|
+
command = ["docker", "run", "--rm", main_args.docker_image, "--help"]
|
|
72
|
+
subprocess.run(command, check=False)
|
|
73
|
+
else:
|
|
74
|
+
if not main_args.binary_path:
|
|
75
|
+
logger.error("Error: --binary-path is required when using binary environment")
|
|
76
|
+
sys.exit(1)
|
|
77
|
+
if not Path(main_args.binary_path).exists():
|
|
78
|
+
logger.error(f"Error: Binary not found at {main_args.binary_path}")
|
|
79
|
+
sys.exit(1)
|
|
80
|
+
command = [main_args.binary_path, "--help"]
|
|
81
|
+
subprocess.run(command, check=False)
|
|
82
|
+
sys.exit(0)
|
|
83
|
+
|
|
84
|
+
if main_args.env == "binary":
|
|
85
|
+
if not main_args.binary_path:
|
|
86
|
+
logger.error("Error: --binary-path is required when using binary environment")
|
|
87
|
+
sys.exit(1)
|
|
88
|
+
if not Path(main_args.binary_path).exists():
|
|
89
|
+
logger.error(f"Error: Binary not found at {main_args.binary_path}")
|
|
90
|
+
sys.exit(1)
|
|
91
|
+
|
|
92
|
+
# Build command for binary execution
|
|
93
|
+
command = [main_args.binary_path]
|
|
94
|
+
|
|
95
|
+
# Ensure storage directory exists
|
|
96
|
+
storage_path = Path(main_args.storage_path)
|
|
97
|
+
storage_path.mkdir(parents=True, exist_ok=True)
|
|
98
|
+
abs_storage_path = str(storage_path.resolve())
|
|
99
|
+
|
|
100
|
+
# Add standard Qdrant arguments
|
|
101
|
+
# Set storage path via environment variable or command argument
|
|
102
|
+
# Qdrant binary accepts --storage-path argument
|
|
103
|
+
command.extend(["--storage-path", abs_storage_path])
|
|
104
|
+
|
|
105
|
+
# Add host, port, and grpc-port arguments
|
|
106
|
+
command.extend(["--host", main_args.host])
|
|
107
|
+
command.extend(["--port", str(main_args.port)])
|
|
108
|
+
command.extend(["--grpc-port", str(main_args.grpc_port)])
|
|
109
|
+
|
|
110
|
+
if main_args.config_path:
|
|
111
|
+
command.extend(["--config-path", main_args.config_path])
|
|
112
|
+
|
|
113
|
+
# Add any additional Qdrant-specific arguments
|
|
114
|
+
command.extend(qdrant_args)
|
|
115
|
+
|
|
116
|
+
else: # docker
|
|
117
|
+
# Ensure storage directory exists
|
|
118
|
+
storage_path = Path(main_args.storage_path)
|
|
119
|
+
storage_path.mkdir(parents=True, exist_ok=True)
|
|
120
|
+
abs_storage_path = str(storage_path.resolve())
|
|
121
|
+
|
|
122
|
+
# Build Docker command
|
|
123
|
+
command = ["docker", "run"]
|
|
124
|
+
|
|
125
|
+
# Container management options
|
|
126
|
+
if main_args.docker_remove:
|
|
127
|
+
command.append("--rm")
|
|
128
|
+
|
|
129
|
+
if main_args.docker_detach:
|
|
130
|
+
command.append("-d")
|
|
131
|
+
# Note: We don't add -it by default to avoid issues in non-interactive environments
|
|
132
|
+
# Users can add it manually if needed via qdrant_args
|
|
133
|
+
|
|
134
|
+
# Container name
|
|
135
|
+
command.extend(["--name", main_args.docker_container_name])
|
|
136
|
+
|
|
137
|
+
# Port mappings
|
|
138
|
+
command.extend(["-p", f"{main_args.port}:6333"])
|
|
139
|
+
command.extend(["-p", f"{main_args.grpc_port}:6334"])
|
|
140
|
+
|
|
141
|
+
# Volume mount for storage
|
|
142
|
+
command.extend(["-v", f"{abs_storage_path}:/qdrant/storage:z"])
|
|
143
|
+
|
|
144
|
+
# Volume mount for config (if provided)
|
|
145
|
+
# Note: Qdrant Docker image accepts environment variables and config files
|
|
146
|
+
# For custom config, mount it as a volume before the image name
|
|
147
|
+
if main_args.config_path:
|
|
148
|
+
config_path = Path(main_args.config_path)
|
|
149
|
+
abs_config_path = config_path.resolve()
|
|
150
|
+
config_dir = str(abs_config_path.parent)
|
|
151
|
+
command.extend(["-v", f"{config_dir}:/qdrant/config:z"])
|
|
152
|
+
# Qdrant looks for config.yaml in /qdrant/config by default
|
|
153
|
+
|
|
154
|
+
# Set storage path environment variable to use the mounted volume
|
|
155
|
+
command.extend(["-e", "QDRANT__STORAGE__STORAGE_PATH=/qdrant/storage"])
|
|
156
|
+
|
|
157
|
+
# Docker image
|
|
158
|
+
command.append(main_args.docker_image)
|
|
159
|
+
|
|
160
|
+
# Qdrant server arguments (if any additional ones are passed)
|
|
161
|
+
|
|
162
|
+
# Add any additional Qdrant arguments
|
|
163
|
+
if qdrant_args:
|
|
164
|
+
command.extend(qdrant_args)
|
|
165
|
+
|
|
166
|
+
# Prepare args for config storage (similar to llama_cpp_server pattern)
|
|
167
|
+
# Extract key-value pairs for configuration
|
|
168
|
+
config_args = []
|
|
169
|
+
if main_args.env == "docker":
|
|
170
|
+
config_args = [
|
|
171
|
+
"--env",
|
|
172
|
+
main_args.env,
|
|
173
|
+
"--host",
|
|
174
|
+
main_args.host,
|
|
175
|
+
"--port",
|
|
176
|
+
str(main_args.port),
|
|
177
|
+
"--grpc-port",
|
|
178
|
+
str(main_args.grpc_port),
|
|
179
|
+
"--storage-path",
|
|
180
|
+
main_args.storage_path,
|
|
181
|
+
"--docker-image",
|
|
182
|
+
main_args.docker_image,
|
|
183
|
+
"--docker-container-name",
|
|
184
|
+
main_args.docker_container_name,
|
|
185
|
+
]
|
|
186
|
+
if main_args.config_path:
|
|
187
|
+
config_args.extend(["--config-path", main_args.config_path])
|
|
188
|
+
else:
|
|
189
|
+
config_args = [
|
|
190
|
+
"--env",
|
|
191
|
+
main_args.env,
|
|
192
|
+
"--binary-path",
|
|
193
|
+
main_args.binary_path,
|
|
194
|
+
"--host",
|
|
195
|
+
main_args.host,
|
|
196
|
+
"--port",
|
|
197
|
+
str(main_args.port),
|
|
198
|
+
"--grpc-port",
|
|
199
|
+
str(main_args.grpc_port),
|
|
200
|
+
"--storage-path",
|
|
201
|
+
main_args.storage_path,
|
|
202
|
+
]
|
|
203
|
+
if main_args.config_path:
|
|
204
|
+
config_args.extend(["--config-path", main_args.config_path])
|
|
205
|
+
|
|
206
|
+
return command, config_args
|