symbolicai 0.20.2__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- symai/__init__.py +96 -64
- symai/backend/base.py +93 -80
- symai/backend/engines/drawing/engine_bfl.py +12 -11
- symai/backend/engines/drawing/engine_gpt_image.py +108 -87
- symai/backend/engines/embedding/engine_llama_cpp.py +25 -28
- symai/backend/engines/embedding/engine_openai.py +3 -5
- symai/backend/engines/execute/engine_python.py +6 -5
- symai/backend/engines/files/engine_io.py +74 -67
- symai/backend/engines/imagecaptioning/engine_blip2.py +3 -3
- symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +54 -38
- symai/backend/engines/index/engine_pinecone.py +23 -24
- symai/backend/engines/index/engine_vectordb.py +16 -14
- symai/backend/engines/lean/engine_lean4.py +38 -34
- symai/backend/engines/neurosymbolic/__init__.py +41 -13
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +262 -182
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +263 -191
- symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +53 -49
- symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +212 -211
- symai/backend/engines/neurosymbolic/engine_groq.py +87 -63
- symai/backend/engines/neurosymbolic/engine_huggingface.py +21 -24
- symai/backend/engines/neurosymbolic/engine_llama_cpp.py +117 -48
- symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +256 -229
- symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +270 -150
- symai/backend/engines/ocr/engine_apilayer.py +6 -8
- symai/backend/engines/output/engine_stdout.py +1 -4
- symai/backend/engines/search/engine_openai.py +7 -7
- symai/backend/engines/search/engine_perplexity.py +5 -5
- symai/backend/engines/search/engine_serpapi.py +12 -14
- symai/backend/engines/speech_to_text/engine_local_whisper.py +20 -27
- symai/backend/engines/symbolic/engine_wolframalpha.py +3 -3
- symai/backend/engines/text_to_speech/engine_openai.py +5 -7
- symai/backend/engines/text_vision/engine_clip.py +7 -11
- symai/backend/engines/userinput/engine_console.py +3 -3
- symai/backend/engines/webscraping/engine_requests.py +81 -48
- symai/backend/mixin/__init__.py +13 -0
- symai/backend/mixin/anthropic.py +4 -2
- symai/backend/mixin/deepseek.py +2 -0
- symai/backend/mixin/google.py +2 -0
- symai/backend/mixin/openai.py +11 -3
- symai/backend/settings.py +83 -16
- symai/chat.py +101 -78
- symai/collect/__init__.py +7 -1
- symai/collect/dynamic.py +77 -69
- symai/collect/pipeline.py +35 -27
- symai/collect/stats.py +75 -63
- symai/components.py +198 -169
- symai/constraints.py +15 -12
- symai/core.py +698 -359
- symai/core_ext.py +32 -34
- symai/endpoints/api.py +80 -73
- symai/extended/.DS_Store +0 -0
- symai/extended/__init__.py +46 -12
- symai/extended/api_builder.py +11 -8
- symai/extended/arxiv_pdf_parser.py +13 -12
- symai/extended/bibtex_parser.py +2 -3
- symai/extended/conversation.py +101 -90
- symai/extended/document.py +17 -10
- symai/extended/file_merger.py +18 -13
- symai/extended/graph.py +18 -13
- symai/extended/html_style_template.py +2 -4
- symai/extended/interfaces/blip_2.py +1 -2
- symai/extended/interfaces/clip.py +1 -2
- symai/extended/interfaces/console.py +7 -1
- symai/extended/interfaces/dall_e.py +1 -1
- symai/extended/interfaces/flux.py +1 -1
- symai/extended/interfaces/gpt_image.py +1 -1
- symai/extended/interfaces/input.py +1 -1
- symai/extended/interfaces/llava.py +0 -1
- symai/extended/interfaces/naive_vectordb.py +7 -8
- symai/extended/interfaces/naive_webscraping.py +1 -1
- symai/extended/interfaces/ocr.py +1 -1
- symai/extended/interfaces/pinecone.py +6 -5
- symai/extended/interfaces/serpapi.py +1 -1
- symai/extended/interfaces/terminal.py +2 -3
- symai/extended/interfaces/tts.py +1 -1
- symai/extended/interfaces/whisper.py +1 -1
- symai/extended/interfaces/wolframalpha.py +1 -1
- symai/extended/metrics/__init__.py +11 -1
- symai/extended/metrics/similarity.py +11 -13
- symai/extended/os_command.py +17 -16
- symai/extended/packages/__init__.py +29 -3
- symai/extended/packages/symdev.py +19 -16
- symai/extended/packages/sympkg.py +12 -9
- symai/extended/packages/symrun.py +21 -19
- symai/extended/repo_cloner.py +11 -10
- symai/extended/seo_query_optimizer.py +1 -2
- symai/extended/solver.py +20 -23
- symai/extended/summarizer.py +4 -3
- symai/extended/taypan_interpreter.py +10 -12
- symai/extended/vectordb.py +99 -82
- symai/formatter/__init__.py +9 -1
- symai/formatter/formatter.py +12 -16
- symai/formatter/regex.py +62 -63
- symai/functional.py +176 -122
- symai/imports.py +136 -127
- symai/interfaces.py +56 -27
- symai/memory.py +14 -13
- symai/misc/console.py +49 -39
- symai/misc/loader.py +5 -3
- symai/models/__init__.py +17 -1
- symai/models/base.py +269 -181
- symai/models/errors.py +0 -1
- symai/ops/__init__.py +32 -22
- symai/ops/measures.py +11 -15
- symai/ops/primitives.py +348 -228
- symai/post_processors.py +32 -28
- symai/pre_processors.py +39 -41
- symai/processor.py +6 -4
- symai/prompts.py +59 -45
- symai/server/huggingface_server.py +23 -20
- symai/server/llama_cpp_server.py +7 -5
- symai/shell.py +3 -4
- symai/shellsv.py +499 -375
- symai/strategy.py +517 -287
- symai/symbol.py +111 -116
- symai/utils.py +42 -36
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/METADATA +4 -2
- symbolicai-1.0.0.dist-info/RECORD +163 -0
- symbolicai-0.20.2.dist-info/RECORD +0 -162
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/WHEEL +0 -0
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/entry_points.txt +0 -0
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
|
+
import contextlib
|
|
1
2
|
import logging
|
|
2
|
-
import os
|
|
3
3
|
from dataclasses import dataclass
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
@@ -7,21 +7,59 @@ import pypdf
|
|
|
7
7
|
import tika
|
|
8
8
|
from tika import unpack
|
|
9
9
|
|
|
10
|
+
from ....utils import UserMessage
|
|
10
11
|
from ...base import Engine
|
|
11
12
|
|
|
12
13
|
# Initialize Tika lazily to avoid spawning JVMs prematurely for all workers
|
|
13
|
-
|
|
14
|
+
_TIKA_STATE = {"initialized": False}
|
|
14
15
|
|
|
15
16
|
def _ensure_tika_vm():
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
try:
|
|
17
|
+
if not _TIKA_STATE["initialized"]:
|
|
18
|
+
with contextlib.suppress(Exception):
|
|
19
19
|
tika.initVM()
|
|
20
|
-
except Exception:
|
|
21
|
-
# If initVM fails, we still attempt unpack.from_file which may auto-init
|
|
22
|
-
pass
|
|
23
20
|
logging.getLogger('tika').setLevel(logging.CRITICAL)
|
|
24
|
-
|
|
21
|
+
_TIKA_STATE["initialized"] = True
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _int_or_none(value):
|
|
25
|
+
return int(value) if value != '' else None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _parse_slice_token(token):
|
|
29
|
+
if ':' not in token:
|
|
30
|
+
return int(token)
|
|
31
|
+
parts = token.split(':')
|
|
32
|
+
if len(parts) == 2:
|
|
33
|
+
start, end = parts
|
|
34
|
+
return slice(_int_or_none(start), _int_or_none(end), None)
|
|
35
|
+
if len(parts) == 3:
|
|
36
|
+
start, end, step = parts
|
|
37
|
+
return slice(_int_or_none(start), _int_or_none(end), _int_or_none(step))
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _parse_slice_spec(file_path):
|
|
42
|
+
if '[' not in file_path or ']' not in file_path:
|
|
43
|
+
return file_path, None
|
|
44
|
+
path_part, remainder = file_path.split('[', 1)
|
|
45
|
+
slice_section = remainder.split(']', 1)[0]
|
|
46
|
+
slices = []
|
|
47
|
+
for token in slice_section.split(','):
|
|
48
|
+
if token == '':
|
|
49
|
+
continue
|
|
50
|
+
parsed = _parse_slice_token(token)
|
|
51
|
+
if parsed is not None:
|
|
52
|
+
slices.append(parsed)
|
|
53
|
+
return path_part, slices or None
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _apply_slices(lines, slices_):
|
|
57
|
+
if slices_ is None:
|
|
58
|
+
return lines
|
|
59
|
+
new_content = []
|
|
60
|
+
for slice_item in slices_:
|
|
61
|
+
new_content.extend(lines[slice_item])
|
|
62
|
+
return new_content
|
|
25
63
|
|
|
26
64
|
|
|
27
65
|
@dataclass
|
|
@@ -42,83 +80,52 @@ class FileEngine(Engine):
|
|
|
42
80
|
def _read_slice_file(self, file_path, argument):
|
|
43
81
|
# check if file is empty
|
|
44
82
|
with_metadata = argument.kwargs.get('with_metadata', False)
|
|
45
|
-
|
|
83
|
+
file_id = Path(argument.prop.prepared_input).stem.replace(' ', '_')
|
|
46
84
|
if file_path is None or file_path.strip() == '':
|
|
47
85
|
return None
|
|
48
86
|
|
|
49
87
|
# check if file slice is used
|
|
50
|
-
slices_ =
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
file_path = file_parts[0]
|
|
54
|
-
# remove string up to '[' and after ']'
|
|
55
|
-
slices_s = file_parts[1].split(']')[0].split(',')
|
|
56
|
-
slices_ = []
|
|
57
|
-
for s in slices_s:
|
|
58
|
-
if s == '':
|
|
59
|
-
continue
|
|
60
|
-
elif ':' in s:
|
|
61
|
-
s_split = s.split(':')
|
|
62
|
-
if len(s_split) == 2:
|
|
63
|
-
start_slice = int(s_split[0]) if s_split[0] != '' else None
|
|
64
|
-
end_slice = int(s_split[1]) if s_split[1] != '' else None
|
|
65
|
-
slices_.append(slice(start_slice, end_slice, None))
|
|
66
|
-
elif len(s_split) == 3:
|
|
67
|
-
start_slice = int(s_split[0]) if s_split[0] != '' else None
|
|
68
|
-
end_slice = int(s_split[1]) if s_split[1] != '' else None
|
|
69
|
-
step_slice = int(s_split[2]) if s_split[2] != '' else None
|
|
70
|
-
slices_.append(slice(start_slice, end_slice, step_slice))
|
|
71
|
-
else:
|
|
72
|
-
slices_.append(int(s))
|
|
88
|
+
file_path, slices_ = _parse_slice_spec(file_path)
|
|
89
|
+
|
|
90
|
+
path_obj = Path(file_path)
|
|
73
91
|
|
|
74
92
|
# check if file exists
|
|
75
|
-
assert
|
|
93
|
+
assert path_obj.exists(), f'File does not exist: {file_path}'
|
|
76
94
|
|
|
77
95
|
# verify if file is empty
|
|
78
|
-
if
|
|
96
|
+
if path_obj.stat().st_size <= 0:
|
|
79
97
|
return ''
|
|
80
98
|
|
|
81
99
|
# For common plain-text extensions, avoid Tika overhead
|
|
82
|
-
ext =
|
|
100
|
+
ext = path_obj.suffix.lower()
|
|
83
101
|
if ext in {'.txt', '.md', '.py', '.json', '.yaml', '.yml', '.csv', '.tsv', '.log'}:
|
|
84
102
|
try:
|
|
85
|
-
with open(
|
|
103
|
+
with path_obj.open(encoding='utf-8', errors='ignore') as f:
|
|
86
104
|
content = f.read()
|
|
87
105
|
if content is None:
|
|
88
106
|
return None
|
|
89
107
|
# Apply slicing by lines, mirroring the Tika branch
|
|
90
108
|
lines = content.split('\n')
|
|
91
|
-
|
|
92
|
-
new_content = []
|
|
93
|
-
for s in slices_:
|
|
94
|
-
new_content.extend(lines[s])
|
|
95
|
-
lines = new_content
|
|
109
|
+
lines = _apply_slices(lines, slices_)
|
|
96
110
|
content = '\n'.join(lines)
|
|
97
111
|
content = content.encode('utf8', 'ignore').decode('utf8', 'ignore')
|
|
98
|
-
return content if not with_metadata else [TextContainer(
|
|
112
|
+
return content if not with_metadata else [TextContainer(file_id, None, content)]
|
|
99
113
|
except Exception:
|
|
100
114
|
# Fallback to Tika if plain read fails
|
|
101
115
|
pass
|
|
102
116
|
|
|
103
117
|
_ensure_tika_vm()
|
|
104
|
-
file_ = unpack.from_file(str(
|
|
105
|
-
if 'content' in file_
|
|
106
|
-
content = file_['content']
|
|
107
|
-
else:
|
|
108
|
-
content = str(file_)
|
|
118
|
+
file_ = unpack.from_file(str(path_obj))
|
|
119
|
+
content = file_['content'] if 'content' in file_ else str(file_)
|
|
109
120
|
|
|
110
121
|
if content is None:
|
|
111
122
|
return None
|
|
112
123
|
content = content.split('\n')
|
|
113
124
|
|
|
114
|
-
|
|
115
|
-
new_content = []
|
|
116
|
-
for s in slices_:
|
|
117
|
-
new_content.extend(content[s])
|
|
118
|
-
content = new_content
|
|
125
|
+
content = _apply_slices(content, slices_)
|
|
119
126
|
content = '\n'.join(content)
|
|
120
127
|
content = content.encode('utf8', 'ignore').decode('utf8', 'ignore')
|
|
121
|
-
return content if not with_metadata else [TextContainer(
|
|
128
|
+
return content if not with_metadata else [TextContainer(file_id, None, content)]
|
|
122
129
|
|
|
123
130
|
|
|
124
131
|
def reset_eof_of_pdf_return_stream(self, pdf_stream_in: list):
|
|
@@ -127,7 +134,7 @@ class FileEngine(Engine):
|
|
|
127
134
|
for i, x in enumerate(pdf_stream_in[::-1]):
|
|
128
135
|
if b'%%EOF' in x:
|
|
129
136
|
actual_line = len(pdf_stream_in)-i
|
|
130
|
-
|
|
137
|
+
UserMessage(f'EOF found at line position {-i} = actual {actual_line}, with value {x}')
|
|
131
138
|
break
|
|
132
139
|
|
|
133
140
|
# return the list up to that point
|
|
@@ -135,31 +142,31 @@ class FileEngine(Engine):
|
|
|
135
142
|
|
|
136
143
|
def fix_pdf(self, file_path: str):
|
|
137
144
|
# opens the file for reading
|
|
138
|
-
|
|
145
|
+
path_obj = Path(file_path)
|
|
146
|
+
with path_obj.open('rb') as p:
|
|
139
147
|
txt = (p.readlines())
|
|
140
148
|
|
|
141
149
|
# get the new list terminating correctly
|
|
142
150
|
txtx = self.reset_eof_of_pdf_return_stream(txt)
|
|
143
151
|
|
|
144
152
|
# write to new pdf
|
|
145
|
-
new_file_path = f'{file_path}_fixed.pdf'
|
|
146
|
-
with open(
|
|
153
|
+
new_file_path = Path(f'{file_path}_fixed.pdf')
|
|
154
|
+
with new_file_path.open('wb') as f:
|
|
147
155
|
f.writelines(txtx)
|
|
148
156
|
|
|
149
|
-
|
|
150
|
-
return fixed_pdf
|
|
157
|
+
return pypdf.PdfReader(str(new_file_path))
|
|
151
158
|
|
|
152
159
|
def read_text(self, pdf_reader, page_range, argument):
|
|
153
160
|
txt = []
|
|
154
161
|
n_pages = len(pdf_reader.pages)
|
|
155
162
|
with_metadata = argument.kwargs.get('with_metadata', False)
|
|
156
|
-
|
|
163
|
+
file_id = Path(argument.prop.prepared_input).stem.replace(' ', '_')
|
|
157
164
|
for i in range(n_pages)[slice(0, n_pages) if page_range is None else page_range]:
|
|
158
165
|
page = pdf_reader.pages[i]
|
|
159
166
|
extracted = page.extract_text()
|
|
160
167
|
extracted = extracted.encode('utf8', 'ignore').decode('utf8', 'ignore')
|
|
161
168
|
if with_metadata:
|
|
162
|
-
txt.append(TextContainer(
|
|
169
|
+
txt.append(TextContainer(file_id, str(i), extracted))
|
|
163
170
|
else:
|
|
164
171
|
txt.append(extracted)
|
|
165
172
|
|
|
@@ -173,17 +180,17 @@ class FileEngine(Engine):
|
|
|
173
180
|
page_range = None
|
|
174
181
|
if 'slice' in kwargs:
|
|
175
182
|
page_range = kwargs['slice']
|
|
176
|
-
if isinstance(page_range, tuple
|
|
183
|
+
if isinstance(page_range, (tuple, list)):
|
|
177
184
|
page_range = slice(*page_range)
|
|
178
185
|
|
|
179
186
|
rsp = ''
|
|
180
187
|
try:
|
|
181
|
-
with
|
|
188
|
+
with Path(path).open('rb') as f:
|
|
182
189
|
# creating a pdf reader object
|
|
183
190
|
pdf_reader = pypdf.PdfReader(f)
|
|
184
191
|
rsp = self.read_text(pdf_reader, page_range, argument)
|
|
185
192
|
except Exception as e:
|
|
186
|
-
|
|
193
|
+
UserMessage(f'Error reading PDF: {e} | {path}')
|
|
187
194
|
if 'fix_pdf' not in kwargs or not kwargs['fix_pdf']:
|
|
188
195
|
raise e
|
|
189
196
|
fixed_pdf = self.fix_pdf(str(path))
|
|
@@ -193,11 +200,11 @@ class FileEngine(Engine):
|
|
|
193
200
|
try:
|
|
194
201
|
rsp = self._read_slice_file(path, argument)
|
|
195
202
|
except Exception as e:
|
|
196
|
-
|
|
203
|
+
UserMessage(f'Error reading empty file: {e} | {path}')
|
|
197
204
|
raise e
|
|
198
205
|
|
|
199
206
|
if rsp is None:
|
|
200
|
-
|
|
207
|
+
UserMessage(f'Error reading file - empty result: {path}', raise_with=Exception)
|
|
201
208
|
|
|
202
209
|
metadata = {}
|
|
203
210
|
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
from typing import List
|
|
2
1
|
|
|
3
2
|
import requests
|
|
4
3
|
import torch
|
|
@@ -10,6 +9,7 @@ except ImportError:
|
|
|
10
9
|
|
|
11
10
|
from PIL import Image
|
|
12
11
|
|
|
12
|
+
from ....utils import UserMessage
|
|
13
13
|
from ...base import Engine
|
|
14
14
|
from ...settings import SYMAI_CONFIG
|
|
15
15
|
|
|
@@ -43,7 +43,7 @@ class Blip2Engine(Engine):
|
|
|
43
43
|
|
|
44
44
|
def forward(self, argument):
|
|
45
45
|
if load_model_and_preprocess is None:
|
|
46
|
-
|
|
46
|
+
UserMessage('Blip2 is not installed. Please install it with `pip install symbolicai[blip2]`', raise_with=ImportError)
|
|
47
47
|
if self.model is None:
|
|
48
48
|
self.model, self.vis_processors, self.txt_processors = load_model_and_preprocess(name = self.name_id,
|
|
49
49
|
model_type = self.model_id,
|
|
@@ -52,7 +52,7 @@ class Blip2Engine(Engine):
|
|
|
52
52
|
|
|
53
53
|
image, prompt = argument.prop.prepared_input
|
|
54
54
|
kwargs = argument.kwargs
|
|
55
|
-
except_remedy = kwargs
|
|
55
|
+
except_remedy = kwargs.get('except_remedy')
|
|
56
56
|
|
|
57
57
|
if 'http' in image:
|
|
58
58
|
image = Image.open(requests.get(image, stream=True).raw).convert('RGB')
|
|
@@ -1,15 +1,16 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import requests
|
|
3
|
-
import json
|
|
4
1
|
import io
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
from pathlib import Path
|
|
5
5
|
|
|
6
|
-
|
|
7
|
-
from requests_toolbelt.multipart.encoder import MultipartEncoder
|
|
6
|
+
import requests
|
|
8
7
|
from PIL.Image import Image
|
|
8
|
+
from requests_toolbelt.multipart.encoder import MultipartEncoder
|
|
9
9
|
|
|
10
|
+
from ....symbol import Result
|
|
11
|
+
from ....utils import UserMessage
|
|
10
12
|
from ...base import Engine
|
|
11
13
|
from ...settings import SYMAI_CONFIG
|
|
12
|
-
from ....symbol import Result
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
def image_to_byte_array(image: Image, format='PNG') -> bytes:
|
|
@@ -18,8 +19,7 @@ def image_to_byte_array(image: Image, format='PNG') -> bytes:
|
|
|
18
19
|
# image.save expects a file-like as a argument
|
|
19
20
|
image.save(imgByteArr, format=format)
|
|
20
21
|
# Turn the BytesIO object back into a bytes object
|
|
21
|
-
|
|
22
|
-
return imgByteArr
|
|
22
|
+
return imgByteArr.getvalue()
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class LLaMAResult(Result):
|
|
@@ -74,7 +74,7 @@ class LLaMACppClientEngine(Engine):
|
|
|
74
74
|
im_bytes = image_to_byte_array(image['content'], format=format_)
|
|
75
75
|
else:
|
|
76
76
|
# Convert image to bytes, open as binary
|
|
77
|
-
with
|
|
77
|
+
with Path(image['content']).open('rb') as f:
|
|
78
78
|
im_bytes = f.read()
|
|
79
79
|
# Create multipart/form-data payload
|
|
80
80
|
payload = MultipartEncoder(
|
|
@@ -87,7 +87,7 @@ class LLaMACppClientEngine(Engine):
|
|
|
87
87
|
# Update the headers for multipart/form-data
|
|
88
88
|
headers = {'Content-Type': payload.content_type}
|
|
89
89
|
api = f'http://{self.host}:{self.port}/llava'
|
|
90
|
-
except_remedy = kwargs
|
|
90
|
+
except_remedy = kwargs.get('except_remedy')
|
|
91
91
|
try:
|
|
92
92
|
# use http localhost 8000 to send a request to the server
|
|
93
93
|
rsp = requests.post(api, data=payload, headers=headers, timeout=self.timeout)
|
|
@@ -95,7 +95,8 @@ class LLaMACppClientEngine(Engine):
|
|
|
95
95
|
except Exception as e:
|
|
96
96
|
if except_remedy is None:
|
|
97
97
|
raise e
|
|
98
|
-
callback
|
|
98
|
+
def callback():
|
|
99
|
+
return requests.post(api, data=payload, headers=headers, timeout=self.timeout)
|
|
99
100
|
res = except_remedy(self, e, callback, argument)
|
|
100
101
|
|
|
101
102
|
metadata = {}
|
|
@@ -105,17 +106,15 @@ class LLaMACppClientEngine(Engine):
|
|
|
105
106
|
output = rsp if isinstance(prompts, list) else rsp[0]
|
|
106
107
|
return output, metadata
|
|
107
108
|
|
|
108
|
-
def
|
|
109
|
-
if argument.prop.raw_input:
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
user: str = ""
|
|
116
|
-
system: str = ""
|
|
117
|
-
system = f'{system}\n' if system and len(system) > 0 else ''
|
|
109
|
+
def _handle_raw_input(self, argument) -> bool:
|
|
110
|
+
if not argument.prop.raw_input:
|
|
111
|
+
return False
|
|
112
|
+
if not argument.prop.processed_input:
|
|
113
|
+
UserMessage('Need to provide a prompt instruction to the engine if raw_input is enabled.', raise_with=ValueError)
|
|
114
|
+
argument.prop.prepared_input = argument.prop.processed_input
|
|
115
|
+
return True
|
|
118
116
|
|
|
117
|
+
def _append_context_sections(self, system: str, argument) -> str:
|
|
119
118
|
ref = argument.prop.instance
|
|
120
119
|
static_ctxt, dyn_ctxt = ref.global_context
|
|
121
120
|
if len(static_ctxt) > 0:
|
|
@@ -126,36 +125,53 @@ class LLaMACppClientEngine(Engine):
|
|
|
126
125
|
|
|
127
126
|
payload = argument.prop.payload
|
|
128
127
|
if argument.prop.payload:
|
|
129
|
-
system += f"[ADDITIONAL CONTEXT]\n{
|
|
128
|
+
system += f"[ADDITIONAL CONTEXT]\n{payload!s}\n\n"
|
|
130
129
|
|
|
131
|
-
examples:
|
|
130
|
+
examples: list[str] = argument.prop.examples
|
|
132
131
|
if examples and len(examples) > 0:
|
|
133
|
-
system += f"[EXAMPLES]\n{
|
|
132
|
+
system += f"[EXAMPLES]\n{examples!s}\n\n"
|
|
133
|
+
|
|
134
|
+
return system
|
|
134
135
|
|
|
136
|
+
def _build_user_instruction(self, argument) -> str:
|
|
137
|
+
user = ""
|
|
135
138
|
if argument.prop.prompt is not None and len(argument.prop.prompt) > 0:
|
|
136
139
|
val = str(argument.prop.prompt)
|
|
137
|
-
# in this engine, instructions are considered as user prompts
|
|
138
140
|
user += f"[INSTRUCTION]\n{val}"
|
|
141
|
+
return user
|
|
139
142
|
|
|
140
|
-
|
|
141
|
-
|
|
143
|
+
def _extract_system_instructions(self, argument, system: str, suffix: str) -> tuple[str, str]:
|
|
142
144
|
if '[SYSTEM_INSTRUCTION::]: <<<' in suffix and argument.prop.parse_system_instructions:
|
|
143
145
|
parts = suffix.split('\n>>>\n')
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
c += 1
|
|
146
|
+
consumed = 0
|
|
147
|
+
for part in parts:
|
|
148
|
+
if 'SYSTEM_INSTRUCTION' in part:
|
|
149
|
+
system += f"{part}\n"
|
|
150
|
+
consumed += 1
|
|
150
151
|
else:
|
|
151
152
|
break
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
user += f"{suffix}"
|
|
153
|
+
suffix = '\n>>>\n'.join(parts[consumed:])
|
|
154
|
+
return system, suffix
|
|
155
155
|
|
|
156
|
+
def _append_template_suffix(self, user: str, argument) -> str:
|
|
156
157
|
if argument.prop.template_suffix:
|
|
157
|
-
user += f"\n[[PLACEHOLDER]]\n{
|
|
158
|
-
user +=
|
|
158
|
+
user += f"\n[[PLACEHOLDER]]\n{argument.prop.template_suffix!s}\n\n"
|
|
159
|
+
user += "Only generate content for the placeholder `[[PLACEHOLDER]]` following the instructions and context information. Do NOT write `[[PLACEHOLDER]]` or anything else in your output.\n\n"
|
|
160
|
+
return user
|
|
161
|
+
|
|
162
|
+
def prepare(self, argument):
|
|
163
|
+
if self._handle_raw_input(argument):
|
|
164
|
+
return
|
|
165
|
+
|
|
166
|
+
system: str = ""
|
|
167
|
+
system = f'{system}\n' if system and len(system) > 0 else ''
|
|
168
|
+
system = self._append_context_sections(system, argument)
|
|
169
|
+
|
|
170
|
+
user = self._build_user_instruction(argument)
|
|
171
|
+
suffix: str = str(argument.prop.processed_input)
|
|
172
|
+
system, suffix = self._extract_system_instructions(argument, system, suffix)
|
|
173
|
+
user += f"{suffix}"
|
|
174
|
+
user = self._append_template_suffix(user, argument)
|
|
159
175
|
|
|
160
176
|
user_prompt = { "role": "user", "content": user }
|
|
161
177
|
argument.prop.prepared_input = [
|
|
@@ -1,17 +1,16 @@
|
|
|
1
|
+
import contextlib
|
|
1
2
|
import itertools
|
|
2
3
|
import warnings
|
|
3
|
-
import numpy as np
|
|
4
4
|
|
|
5
|
-
warnings.filterwarnings('ignore', module='pinecone')
|
|
6
|
-
try:
|
|
7
|
-
from pinecone import Pinecone, ServerlessSpec
|
|
8
|
-
except:
|
|
9
|
-
pass
|
|
10
|
-
|
|
11
|
-
from ...base import Engine
|
|
12
|
-
from ...settings import SYMAI_CONFIG
|
|
13
5
|
from .... import core_ext
|
|
14
6
|
from ....symbol import Result
|
|
7
|
+
from ....utils import UserMessage
|
|
8
|
+
from ...base import Engine
|
|
9
|
+
from ...settings import SYMAI_CONFIG
|
|
10
|
+
|
|
11
|
+
warnings.filterwarnings('ignore', module='pinecone')
|
|
12
|
+
with contextlib.suppress(BaseException):
|
|
13
|
+
from pinecone import Pinecone, ServerlessSpec
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
def chunks(iterable, batch_size=100):
|
|
@@ -47,23 +46,23 @@ class PineconeResult(Result):
|
|
|
47
46
|
return
|
|
48
47
|
|
|
49
48
|
for i, match in enumerate(self.value):
|
|
50
|
-
|
|
51
|
-
if
|
|
52
|
-
m =
|
|
49
|
+
match_value = match.strip()
|
|
50
|
+
if match_value.startswith('# ----[FILE_START]') and '# ----[FILE_END]' in match_value:
|
|
51
|
+
m = match_value.split('[FILE_CONTENT]:')[-1].strip()
|
|
53
52
|
splits = m.split('# ----[FILE_END]')
|
|
54
|
-
assert len(splits) >= 2, 'Invalid file format: {}'
|
|
53
|
+
assert len(splits) >= 2, f'Invalid file format: {splits}'
|
|
55
54
|
content = splits[0]
|
|
56
55
|
file_name = ','.join(splits[1:]) # TODO: check why there are multiple file names
|
|
57
56
|
yield file_name.strip(), content.strip()
|
|
58
57
|
else:
|
|
59
|
-
yield i+1,
|
|
58
|
+
yield i+1, match_value
|
|
60
59
|
|
|
61
60
|
def __str__(self):
|
|
62
61
|
str_view = ''
|
|
63
62
|
for filename, content in self._unpack_matches():
|
|
64
63
|
# indent each line of the content
|
|
65
|
-
|
|
66
|
-
str_view += f'* {filename}\n{
|
|
64
|
+
content_view = '\n'.join([' ' + line for line in content.split('\n')])
|
|
65
|
+
str_view += f'* {filename}\n{content_view}\n\n'
|
|
67
66
|
return f'''
|
|
68
67
|
[RESULT]
|
|
69
68
|
{'-=-' * 13}
|
|
@@ -138,7 +137,7 @@ class PineconeIndexEngine(Engine):
|
|
|
138
137
|
def id(self) -> str:
|
|
139
138
|
if SYMAI_CONFIG['INDEXING_ENGINE_API_KEY']:
|
|
140
139
|
if Pinecone is None:
|
|
141
|
-
|
|
140
|
+
UserMessage('Pinecone is not installed. Please install it with `pip install symbolicai[pinecone]`.')
|
|
142
141
|
return 'index'
|
|
143
142
|
return super().id() # default to unregistered
|
|
144
143
|
|
|
@@ -150,13 +149,13 @@ class PineconeIndexEngine(Engine):
|
|
|
150
149
|
self.environment = kwargs['INDEXING_ENGINE_ENVIRONMENT']
|
|
151
150
|
|
|
152
151
|
def _configure_index(self, **kwargs):
|
|
153
|
-
index_name = kwargs
|
|
152
|
+
index_name = kwargs.get('index_name', self.index_name)
|
|
154
153
|
|
|
155
|
-
del_ = kwargs
|
|
154
|
+
del_ = kwargs.get('index_del', False)
|
|
156
155
|
if self.index is not None and del_:
|
|
157
156
|
self.pinecone.delete_index(index_name)
|
|
158
157
|
|
|
159
|
-
get_ = kwargs
|
|
158
|
+
get_ = kwargs.get('index_get', False)
|
|
160
159
|
if self.index is not None and get_:
|
|
161
160
|
self.index = self.pinecone.Index(name=index_name)
|
|
162
161
|
|
|
@@ -184,9 +183,9 @@ class PineconeIndexEngine(Engine):
|
|
|
184
183
|
self._configure_index(**kwargs)
|
|
185
184
|
|
|
186
185
|
if operation == 'search':
|
|
187
|
-
index_top_k
|
|
188
|
-
index_values
|
|
189
|
-
index_metadata = kwargs
|
|
186
|
+
index_top_k = kwargs.get('index_top_k', self.index_top_k)
|
|
187
|
+
index_values = kwargs.get('index_values', self.index_values)
|
|
188
|
+
index_metadata = kwargs.get('index_metadata', self.index_metadata)
|
|
190
189
|
rsp = self._query(embedding, index_top_k, index_values, index_metadata)
|
|
191
190
|
|
|
192
191
|
elif operation == 'add':
|
|
@@ -197,7 +196,7 @@ class PineconeIndexEngine(Engine):
|
|
|
197
196
|
self._configure_index(**kwargs)
|
|
198
197
|
|
|
199
198
|
else:
|
|
200
|
-
|
|
199
|
+
UserMessage('Invalid operation', raise_with=ValueError)
|
|
201
200
|
|
|
202
201
|
metadata = {}
|
|
203
202
|
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import itertools
|
|
2
2
|
from copy import deepcopy
|
|
3
|
+
from typing import ClassVar
|
|
3
4
|
|
|
4
5
|
from ....extended.vectordb import VectorDB
|
|
5
6
|
from ....symbol import Result
|
|
6
|
-
from ....utils import
|
|
7
|
+
from ....utils import UserMessage
|
|
7
8
|
from ...base import Engine
|
|
8
9
|
from ...settings import SYMAI_CONFIG
|
|
9
10
|
|
|
@@ -40,23 +41,23 @@ class VectorDBResult(Result):
|
|
|
40
41
|
if not self.value:
|
|
41
42
|
return
|
|
42
43
|
for i, match in enumerate(self.value):
|
|
43
|
-
|
|
44
|
-
if
|
|
45
|
-
m =
|
|
44
|
+
match_value = match.strip()
|
|
45
|
+
if match_value.startswith('# ----[FILE_START]') and '# ----[FILE_END]' in match_value:
|
|
46
|
+
m = match_value.split('[FILE_CONTENT]:')[-1].strip()
|
|
46
47
|
splits = m.split('# ----[FILE_END]')
|
|
47
|
-
assert len(splits) >= 2, 'Invalid file format: {}'
|
|
48
|
+
assert len(splits) >= 2, f'Invalid file format: {splits}'
|
|
48
49
|
content = splits[0]
|
|
49
50
|
file_name = ','.join(splits[1:]) # TODO: check why there are multiple file names
|
|
50
51
|
yield file_name.strip(), content.strip()
|
|
51
52
|
else:
|
|
52
|
-
yield i+1,
|
|
53
|
+
yield i+1, match_value
|
|
53
54
|
|
|
54
55
|
def __str__(self):
|
|
55
56
|
str_view = ''
|
|
56
57
|
for filename, content in self._unpack_matches():
|
|
57
58
|
# indent each line of the content
|
|
58
|
-
|
|
59
|
-
str_view += f'* {filename}\n{
|
|
59
|
+
content_view = '\n'.join([' ' + line for line in content.split('\n')])
|
|
60
|
+
str_view += f'* {filename}\n{content_view}\n\n'
|
|
60
61
|
return f'''
|
|
61
62
|
[RESULT]
|
|
62
63
|
{'-=-' * 13}
|
|
@@ -85,8 +86,8 @@ class VectorDBIndexEngine(Engine):
|
|
|
85
86
|
_default_index_dims = 768
|
|
86
87
|
_default_index_top_k = 5
|
|
87
88
|
_default_index_metric = 'cosine'
|
|
88
|
-
_index_dict = {}
|
|
89
|
-
_index_storage_file = None
|
|
89
|
+
_index_dict: ClassVar[dict[str, object]] = {}
|
|
90
|
+
_index_storage_file: ClassVar[str | None] = None
|
|
90
91
|
def __init__(
|
|
91
92
|
self,
|
|
92
93
|
index_name=_default_index_name,
|
|
@@ -95,7 +96,7 @@ class VectorDBIndexEngine(Engine):
|
|
|
95
96
|
index_metric=_default_index_metric,
|
|
96
97
|
index_dict=_index_dict,
|
|
97
98
|
index_storage_file=_index_storage_file,
|
|
98
|
-
**
|
|
99
|
+
**_kwargs
|
|
99
100
|
):
|
|
100
101
|
super().__init__()
|
|
101
102
|
self.config = deepcopy(SYMAI_CONFIG)
|
|
@@ -131,7 +132,7 @@ class VectorDBIndexEngine(Engine):
|
|
|
131
132
|
|
|
132
133
|
if operation == 'search':
|
|
133
134
|
if isinstance(query, list) and len(query) > 1:
|
|
134
|
-
|
|
135
|
+
UserMessage('VectorDB indexing engine does not support multiple queries. Pass a single string query instead.', raise_with=ValueError)
|
|
135
136
|
query_vector = self.index[index_name].embedding_function([query])[0]
|
|
136
137
|
results = self.index[index_name](vector=query_vector, top_k=top_k, return_similarities=similarities)
|
|
137
138
|
rsp = [{'metadata': {'text': result}} for result in results]
|
|
@@ -154,9 +155,9 @@ class VectorDBIndexEngine(Engine):
|
|
|
154
155
|
elif kwargs.get('purge', maybe_as_prompt == 'purge'):
|
|
155
156
|
self.purge(index_name)
|
|
156
157
|
else:
|
|
157
|
-
|
|
158
|
+
UserMessage('Invalid configuration; please use either "load", "save", or "purge".', raise_with=ValueError)
|
|
158
159
|
else:
|
|
159
|
-
|
|
160
|
+
UserMessage('Invalid operation; please use either "search", "add", or "config".', raise_with=ValueError)
|
|
160
161
|
|
|
161
162
|
metadata = {}
|
|
162
163
|
rsp = VectorDBResult(rsp, query[0], None)
|
|
@@ -176,6 +177,7 @@ class VectorDBIndexEngine(Engine):
|
|
|
176
177
|
def prepare(self, argument):
|
|
177
178
|
assert not argument.prop.processed_input, 'VectorDB indexing engine does not support processed_input.'
|
|
178
179
|
argument.prop.prepared_input = argument.prop.prompt
|
|
180
|
+
argument.prop.limit = 1
|
|
179
181
|
|
|
180
182
|
def load(self, index_name, storage_file, index_dims, top_k, metric):
|
|
181
183
|
self.index[index_name] = VectorDB(
|