symbolicai 0.20.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. symai/__init__.py +96 -64
  2. symai/backend/base.py +93 -80
  3. symai/backend/engines/drawing/engine_bfl.py +12 -11
  4. symai/backend/engines/drawing/engine_gpt_image.py +108 -87
  5. symai/backend/engines/embedding/engine_llama_cpp.py +25 -28
  6. symai/backend/engines/embedding/engine_openai.py +3 -5
  7. symai/backend/engines/execute/engine_python.py +6 -5
  8. symai/backend/engines/files/engine_io.py +74 -67
  9. symai/backend/engines/imagecaptioning/engine_blip2.py +3 -3
  10. symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +54 -38
  11. symai/backend/engines/index/engine_pinecone.py +23 -24
  12. symai/backend/engines/index/engine_vectordb.py +16 -14
  13. symai/backend/engines/lean/engine_lean4.py +38 -34
  14. symai/backend/engines/neurosymbolic/__init__.py +41 -13
  15. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +262 -182
  16. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +263 -191
  17. symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +53 -49
  18. symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +212 -211
  19. symai/backend/engines/neurosymbolic/engine_groq.py +87 -63
  20. symai/backend/engines/neurosymbolic/engine_huggingface.py +21 -24
  21. symai/backend/engines/neurosymbolic/engine_llama_cpp.py +117 -48
  22. symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +256 -229
  23. symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +270 -150
  24. symai/backend/engines/ocr/engine_apilayer.py +6 -8
  25. symai/backend/engines/output/engine_stdout.py +1 -4
  26. symai/backend/engines/search/engine_openai.py +7 -7
  27. symai/backend/engines/search/engine_perplexity.py +5 -5
  28. symai/backend/engines/search/engine_serpapi.py +12 -14
  29. symai/backend/engines/speech_to_text/engine_local_whisper.py +20 -27
  30. symai/backend/engines/symbolic/engine_wolframalpha.py +3 -3
  31. symai/backend/engines/text_to_speech/engine_openai.py +5 -7
  32. symai/backend/engines/text_vision/engine_clip.py +7 -11
  33. symai/backend/engines/userinput/engine_console.py +3 -3
  34. symai/backend/engines/webscraping/engine_requests.py +81 -48
  35. symai/backend/mixin/__init__.py +13 -0
  36. symai/backend/mixin/anthropic.py +4 -2
  37. symai/backend/mixin/deepseek.py +2 -0
  38. symai/backend/mixin/google.py +2 -0
  39. symai/backend/mixin/openai.py +11 -3
  40. symai/backend/settings.py +83 -16
  41. symai/chat.py +101 -78
  42. symai/collect/__init__.py +7 -1
  43. symai/collect/dynamic.py +77 -69
  44. symai/collect/pipeline.py +35 -27
  45. symai/collect/stats.py +75 -63
  46. symai/components.py +198 -169
  47. symai/constraints.py +15 -12
  48. symai/core.py +698 -359
  49. symai/core_ext.py +32 -34
  50. symai/endpoints/api.py +80 -73
  51. symai/extended/.DS_Store +0 -0
  52. symai/extended/__init__.py +46 -12
  53. symai/extended/api_builder.py +11 -8
  54. symai/extended/arxiv_pdf_parser.py +13 -12
  55. symai/extended/bibtex_parser.py +2 -3
  56. symai/extended/conversation.py +101 -90
  57. symai/extended/document.py +17 -10
  58. symai/extended/file_merger.py +18 -13
  59. symai/extended/graph.py +18 -13
  60. symai/extended/html_style_template.py +2 -4
  61. symai/extended/interfaces/blip_2.py +1 -2
  62. symai/extended/interfaces/clip.py +1 -2
  63. symai/extended/interfaces/console.py +7 -1
  64. symai/extended/interfaces/dall_e.py +1 -1
  65. symai/extended/interfaces/flux.py +1 -1
  66. symai/extended/interfaces/gpt_image.py +1 -1
  67. symai/extended/interfaces/input.py +1 -1
  68. symai/extended/interfaces/llava.py +0 -1
  69. symai/extended/interfaces/naive_vectordb.py +7 -8
  70. symai/extended/interfaces/naive_webscraping.py +1 -1
  71. symai/extended/interfaces/ocr.py +1 -1
  72. symai/extended/interfaces/pinecone.py +6 -5
  73. symai/extended/interfaces/serpapi.py +1 -1
  74. symai/extended/interfaces/terminal.py +2 -3
  75. symai/extended/interfaces/tts.py +1 -1
  76. symai/extended/interfaces/whisper.py +1 -1
  77. symai/extended/interfaces/wolframalpha.py +1 -1
  78. symai/extended/metrics/__init__.py +11 -1
  79. symai/extended/metrics/similarity.py +11 -13
  80. symai/extended/os_command.py +17 -16
  81. symai/extended/packages/__init__.py +29 -3
  82. symai/extended/packages/symdev.py +19 -16
  83. symai/extended/packages/sympkg.py +12 -9
  84. symai/extended/packages/symrun.py +21 -19
  85. symai/extended/repo_cloner.py +11 -10
  86. symai/extended/seo_query_optimizer.py +1 -2
  87. symai/extended/solver.py +20 -23
  88. symai/extended/summarizer.py +4 -3
  89. symai/extended/taypan_interpreter.py +10 -12
  90. symai/extended/vectordb.py +99 -82
  91. symai/formatter/__init__.py +9 -1
  92. symai/formatter/formatter.py +12 -16
  93. symai/formatter/regex.py +62 -63
  94. symai/functional.py +176 -122
  95. symai/imports.py +136 -127
  96. symai/interfaces.py +56 -27
  97. symai/memory.py +14 -13
  98. symai/misc/console.py +49 -39
  99. symai/misc/loader.py +5 -3
  100. symai/models/__init__.py +17 -1
  101. symai/models/base.py +269 -181
  102. symai/models/errors.py +0 -1
  103. symai/ops/__init__.py +32 -22
  104. symai/ops/measures.py +11 -15
  105. symai/ops/primitives.py +348 -228
  106. symai/post_processors.py +32 -28
  107. symai/pre_processors.py +39 -41
  108. symai/processor.py +6 -4
  109. symai/prompts.py +59 -45
  110. symai/server/huggingface_server.py +23 -20
  111. symai/server/llama_cpp_server.py +7 -5
  112. symai/shell.py +3 -4
  113. symai/shellsv.py +499 -375
  114. symai/strategy.py +517 -287
  115. symai/symbol.py +111 -116
  116. symai/utils.py +42 -36
  117. {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/METADATA +4 -2
  118. symbolicai-1.0.0.dist-info/RECORD +163 -0
  119. symbolicai-0.20.2.dist-info/RECORD +0 -162
  120. {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/WHEEL +0 -0
  121. {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/entry_points.txt +0 -0
  122. {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/licenses/LICENSE +0 -0
  123. {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
+ import contextlib
1
2
  import logging
2
- import os
3
3
  from dataclasses import dataclass
4
4
  from pathlib import Path
5
5
 
@@ -7,21 +7,59 @@ import pypdf
7
7
  import tika
8
8
  from tika import unpack
9
9
 
10
+ from ....utils import UserMessage
10
11
  from ...base import Engine
11
12
 
12
13
  # Initialize Tika lazily to avoid spawning JVMs prematurely for all workers
13
- _TIKA_INITIALIZED = False
14
+ _TIKA_STATE = {"initialized": False}
14
15
 
15
16
  def _ensure_tika_vm():
16
- global _TIKA_INITIALIZED
17
- if not _TIKA_INITIALIZED:
18
- try:
17
+ if not _TIKA_STATE["initialized"]:
18
+ with contextlib.suppress(Exception):
19
19
  tika.initVM()
20
- except Exception:
21
- # If initVM fails, we still attempt unpack.from_file which may auto-init
22
- pass
23
20
  logging.getLogger('tika').setLevel(logging.CRITICAL)
24
- _TIKA_INITIALIZED = True
21
+ _TIKA_STATE["initialized"] = True
22
+
23
+
24
+ def _int_or_none(value):
25
+ return int(value) if value != '' else None
26
+
27
+
28
+ def _parse_slice_token(token):
29
+ if ':' not in token:
30
+ return int(token)
31
+ parts = token.split(':')
32
+ if len(parts) == 2:
33
+ start, end = parts
34
+ return slice(_int_or_none(start), _int_or_none(end), None)
35
+ if len(parts) == 3:
36
+ start, end, step = parts
37
+ return slice(_int_or_none(start), _int_or_none(end), _int_or_none(step))
38
+ return None
39
+
40
+
41
+ def _parse_slice_spec(file_path):
42
+ if '[' not in file_path or ']' not in file_path:
43
+ return file_path, None
44
+ path_part, remainder = file_path.split('[', 1)
45
+ slice_section = remainder.split(']', 1)[0]
46
+ slices = []
47
+ for token in slice_section.split(','):
48
+ if token == '':
49
+ continue
50
+ parsed = _parse_slice_token(token)
51
+ if parsed is not None:
52
+ slices.append(parsed)
53
+ return path_part, slices or None
54
+
55
+
56
+ def _apply_slices(lines, slices_):
57
+ if slices_ is None:
58
+ return lines
59
+ new_content = []
60
+ for slice_item in slices_:
61
+ new_content.extend(lines[slice_item])
62
+ return new_content
25
63
 
26
64
 
27
65
  @dataclass
@@ -42,83 +80,52 @@ class FileEngine(Engine):
42
80
  def _read_slice_file(self, file_path, argument):
43
81
  # check if file is empty
44
82
  with_metadata = argument.kwargs.get('with_metadata', False)
45
- id = Path(argument.prop.prepared_input).stem.replace(' ', '_')
83
+ file_id = Path(argument.prop.prepared_input).stem.replace(' ', '_')
46
84
  if file_path is None or file_path.strip() == '':
47
85
  return None
48
86
 
49
87
  # check if file slice is used
50
- slices_ = None
51
- if '[' in file_path and ']' in file_path:
52
- file_parts = file_path.split('[')
53
- file_path = file_parts[0]
54
- # remove string up to '[' and after ']'
55
- slices_s = file_parts[1].split(']')[0].split(',')
56
- slices_ = []
57
- for s in slices_s:
58
- if s == '':
59
- continue
60
- elif ':' in s:
61
- s_split = s.split(':')
62
- if len(s_split) == 2:
63
- start_slice = int(s_split[0]) if s_split[0] != '' else None
64
- end_slice = int(s_split[1]) if s_split[1] != '' else None
65
- slices_.append(slice(start_slice, end_slice, None))
66
- elif len(s_split) == 3:
67
- start_slice = int(s_split[0]) if s_split[0] != '' else None
68
- end_slice = int(s_split[1]) if s_split[1] != '' else None
69
- step_slice = int(s_split[2]) if s_split[2] != '' else None
70
- slices_.append(slice(start_slice, end_slice, step_slice))
71
- else:
72
- slices_.append(int(s))
88
+ file_path, slices_ = _parse_slice_spec(file_path)
89
+
90
+ path_obj = Path(file_path)
73
91
 
74
92
  # check if file exists
75
- assert os.path.exists(file_path), f'File does not exist: {file_path}'
93
+ assert path_obj.exists(), f'File does not exist: {file_path}'
76
94
 
77
95
  # verify if file is empty
78
- if os.path.getsize(file_path) <= 0:
96
+ if path_obj.stat().st_size <= 0:
79
97
  return ''
80
98
 
81
99
  # For common plain-text extensions, avoid Tika overhead
82
- ext = Path(file_path).suffix.lower()
100
+ ext = path_obj.suffix.lower()
83
101
  if ext in {'.txt', '.md', '.py', '.json', '.yaml', '.yml', '.csv', '.tsv', '.log'}:
84
102
  try:
85
- with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
103
+ with path_obj.open(encoding='utf-8', errors='ignore') as f:
86
104
  content = f.read()
87
105
  if content is None:
88
106
  return None
89
107
  # Apply slicing by lines, mirroring the Tika branch
90
108
  lines = content.split('\n')
91
- if slices_ is not None:
92
- new_content = []
93
- for s in slices_:
94
- new_content.extend(lines[s])
95
- lines = new_content
109
+ lines = _apply_slices(lines, slices_)
96
110
  content = '\n'.join(lines)
97
111
  content = content.encode('utf8', 'ignore').decode('utf8', 'ignore')
98
- return content if not with_metadata else [TextContainer(id, None, content)]
112
+ return content if not with_metadata else [TextContainer(file_id, None, content)]
99
113
  except Exception:
100
114
  # Fallback to Tika if plain read fails
101
115
  pass
102
116
 
103
117
  _ensure_tika_vm()
104
- file_ = unpack.from_file(str(file_path))
105
- if 'content' in file_:
106
- content = file_['content']
107
- else:
108
- content = str(file_)
118
+ file_ = unpack.from_file(str(path_obj))
119
+ content = file_['content'] if 'content' in file_ else str(file_)
109
120
 
110
121
  if content is None:
111
122
  return None
112
123
  content = content.split('\n')
113
124
 
114
- if slices_ is not None:
115
- new_content = []
116
- for s in slices_:
117
- new_content.extend(content[s])
118
- content = new_content
125
+ content = _apply_slices(content, slices_)
119
126
  content = '\n'.join(content)
120
127
  content = content.encode('utf8', 'ignore').decode('utf8', 'ignore')
121
- return content if not with_metadata else [TextContainer(id, None, content)]
128
+ return content if not with_metadata else [TextContainer(file_id, None, content)]
122
129
 
123
130
 
124
131
  def reset_eof_of_pdf_return_stream(self, pdf_stream_in: list):
@@ -127,7 +134,7 @@ class FileEngine(Engine):
127
134
  for i, x in enumerate(pdf_stream_in[::-1]):
128
135
  if b'%%EOF' in x:
129
136
  actual_line = len(pdf_stream_in)-i
130
- print(f'EOF found at line position {-i} = actual {actual_line}, with value {x}')
137
+ UserMessage(f'EOF found at line position {-i} = actual {actual_line}, with value {x}')
131
138
  break
132
139
 
133
140
  # return the list up to that point
@@ -135,31 +142,31 @@ class FileEngine(Engine):
135
142
 
136
143
  def fix_pdf(self, file_path: str):
137
144
  # opens the file for reading
138
- with open(file_path, 'rb') as p:
145
+ path_obj = Path(file_path)
146
+ with path_obj.open('rb') as p:
139
147
  txt = (p.readlines())
140
148
 
141
149
  # get the new list terminating correctly
142
150
  txtx = self.reset_eof_of_pdf_return_stream(txt)
143
151
 
144
152
  # write to new pdf
145
- new_file_path = f'{file_path}_fixed.pdf'
146
- with open(new_file_path, 'wb') as f:
153
+ new_file_path = Path(f'{file_path}_fixed.pdf')
154
+ with new_file_path.open('wb') as f:
147
155
  f.writelines(txtx)
148
156
 
149
- fixed_pdf = pypdf.PdfReader(new_file_path)
150
- return fixed_pdf
157
+ return pypdf.PdfReader(str(new_file_path))
151
158
 
152
159
  def read_text(self, pdf_reader, page_range, argument):
153
160
  txt = []
154
161
  n_pages = len(pdf_reader.pages)
155
162
  with_metadata = argument.kwargs.get('with_metadata', False)
156
- id = Path(argument.prop.prepared_input).stem.replace(' ', '_')
163
+ file_id = Path(argument.prop.prepared_input).stem.replace(' ', '_')
157
164
  for i in range(n_pages)[slice(0, n_pages) if page_range is None else page_range]:
158
165
  page = pdf_reader.pages[i]
159
166
  extracted = page.extract_text()
160
167
  extracted = extracted.encode('utf8', 'ignore').decode('utf8', 'ignore')
161
168
  if with_metadata:
162
- txt.append(TextContainer(id, str(i), extracted))
169
+ txt.append(TextContainer(file_id, str(i), extracted))
163
170
  else:
164
171
  txt.append(extracted)
165
172
 
@@ -173,17 +180,17 @@ class FileEngine(Engine):
173
180
  page_range = None
174
181
  if 'slice' in kwargs:
175
182
  page_range = kwargs['slice']
176
- if isinstance(page_range, tuple) or isinstance(page_range, list):
183
+ if isinstance(page_range, (tuple, list)):
177
184
  page_range = slice(*page_range)
178
185
 
179
186
  rsp = ''
180
187
  try:
181
- with open(str(path), 'rb') as f:
188
+ with Path(path).open('rb') as f:
182
189
  # creating a pdf reader object
183
190
  pdf_reader = pypdf.PdfReader(f)
184
191
  rsp = self.read_text(pdf_reader, page_range, argument)
185
192
  except Exception as e:
186
- print(f'Error reading PDF: {e} | {path}')
193
+ UserMessage(f'Error reading PDF: {e} | {path}')
187
194
  if 'fix_pdf' not in kwargs or not kwargs['fix_pdf']:
188
195
  raise e
189
196
  fixed_pdf = self.fix_pdf(str(path))
@@ -193,11 +200,11 @@ class FileEngine(Engine):
193
200
  try:
194
201
  rsp = self._read_slice_file(path, argument)
195
202
  except Exception as e:
196
- print(f'Error reading empty file: {e} | {path}')
203
+ UserMessage(f'Error reading empty file: {e} | {path}')
197
204
  raise e
198
205
 
199
206
  if rsp is None:
200
- raise Exception(f'Error reading file - empty result: {path}')
207
+ UserMessage(f'Error reading file - empty result: {path}', raise_with=Exception)
201
208
 
202
209
  metadata = {}
203
210
 
@@ -1,4 +1,3 @@
1
- from typing import List
2
1
 
3
2
  import requests
4
3
  import torch
@@ -10,6 +9,7 @@ except ImportError:
10
9
 
11
10
  from PIL import Image
12
11
 
12
+ from ....utils import UserMessage
13
13
  from ...base import Engine
14
14
  from ...settings import SYMAI_CONFIG
15
15
 
@@ -43,7 +43,7 @@ class Blip2Engine(Engine):
43
43
 
44
44
  def forward(self, argument):
45
45
  if load_model_and_preprocess is None:
46
- raise ImportError('Blip2 is not installed. Please install it with `pip install symbolicai[blip2]`')
46
+ UserMessage('Blip2 is not installed. Please install it with `pip install symbolicai[blip2]`', raise_with=ImportError)
47
47
  if self.model is None:
48
48
  self.model, self.vis_processors, self.txt_processors = load_model_and_preprocess(name = self.name_id,
49
49
  model_type = self.model_id,
@@ -52,7 +52,7 @@ class Blip2Engine(Engine):
52
52
 
53
53
  image, prompt = argument.prop.prepared_input
54
54
  kwargs = argument.kwargs
55
- except_remedy = kwargs['except_remedy'] if 'except_remedy' in kwargs else None
55
+ except_remedy = kwargs.get('except_remedy')
56
56
 
57
57
  if 'http' in image:
58
58
  image = Image.open(requests.get(image, stream=True).raw).convert('RGB')
@@ -1,15 +1,16 @@
1
- import logging
2
- import requests
3
- import json
4
1
  import io
2
+ import json
3
+ import logging
4
+ from pathlib import Path
5
5
 
6
- from typing import List
7
- from requests_toolbelt.multipart.encoder import MultipartEncoder
6
+ import requests
8
7
  from PIL.Image import Image
8
+ from requests_toolbelt.multipart.encoder import MultipartEncoder
9
9
 
10
+ from ....symbol import Result
11
+ from ....utils import UserMessage
10
12
  from ...base import Engine
11
13
  from ...settings import SYMAI_CONFIG
12
- from ....symbol import Result
13
14
 
14
15
 
15
16
  def image_to_byte_array(image: Image, format='PNG') -> bytes:
@@ -18,8 +19,7 @@ def image_to_byte_array(image: Image, format='PNG') -> bytes:
18
19
  # image.save expects a file-like as a argument
19
20
  image.save(imgByteArr, format=format)
20
21
  # Turn the BytesIO object back into a bytes object
21
- imgByteArr = imgByteArr.getvalue()
22
- return imgByteArr
22
+ return imgByteArr.getvalue()
23
23
 
24
24
 
25
25
  class LLaMAResult(Result):
@@ -74,7 +74,7 @@ class LLaMACppClientEngine(Engine):
74
74
  im_bytes = image_to_byte_array(image['content'], format=format_)
75
75
  else:
76
76
  # Convert image to bytes, open as binary
77
- with open(image['content'], 'rb') as f:
77
+ with Path(image['content']).open('rb') as f:
78
78
  im_bytes = f.read()
79
79
  # Create multipart/form-data payload
80
80
  payload = MultipartEncoder(
@@ -87,7 +87,7 @@ class LLaMACppClientEngine(Engine):
87
87
  # Update the headers for multipart/form-data
88
88
  headers = {'Content-Type': payload.content_type}
89
89
  api = f'http://{self.host}:{self.port}/llava'
90
- except_remedy = kwargs['except_remedy'] if 'except_remedy' in kwargs else None
90
+ except_remedy = kwargs.get('except_remedy')
91
91
  try:
92
92
  # use http localhost 8000 to send a request to the server
93
93
  rsp = requests.post(api, data=payload, headers=headers, timeout=self.timeout)
@@ -95,7 +95,8 @@ class LLaMACppClientEngine(Engine):
95
95
  except Exception as e:
96
96
  if except_remedy is None:
97
97
  raise e
98
- callback = lambda: requests.post(api, data=payload, headers=headers, timeout=self.timeout)
98
+ def callback():
99
+ return requests.post(api, data=payload, headers=headers, timeout=self.timeout)
99
100
  res = except_remedy(self, e, callback, argument)
100
101
 
101
102
  metadata = {}
@@ -105,17 +106,15 @@ class LLaMACppClientEngine(Engine):
105
106
  output = rsp if isinstance(prompts, list) else rsp[0]
106
107
  return output, metadata
107
108
 
108
- def prepare(self, argument):
109
- if argument.prop.raw_input:
110
- if not argument.prop.processed_input:
111
- raise ValueError('Need to provide a prompt instruction to the engine if raw_input is enabled.')
112
- argument.prop.prepared_input = argument.prop.processed_input
113
- return
114
-
115
- user: str = ""
116
- system: str = ""
117
- system = f'{system}\n' if system and len(system) > 0 else ''
109
+ def _handle_raw_input(self, argument) -> bool:
110
+ if not argument.prop.raw_input:
111
+ return False
112
+ if not argument.prop.processed_input:
113
+ UserMessage('Need to provide a prompt instruction to the engine if raw_input is enabled.', raise_with=ValueError)
114
+ argument.prop.prepared_input = argument.prop.processed_input
115
+ return True
118
116
 
117
+ def _append_context_sections(self, system: str, argument) -> str:
119
118
  ref = argument.prop.instance
120
119
  static_ctxt, dyn_ctxt = ref.global_context
121
120
  if len(static_ctxt) > 0:
@@ -126,36 +125,53 @@ class LLaMACppClientEngine(Engine):
126
125
 
127
126
  payload = argument.prop.payload
128
127
  if argument.prop.payload:
129
- system += f"[ADDITIONAL CONTEXT]\n{str(payload)}\n\n"
128
+ system += f"[ADDITIONAL CONTEXT]\n{payload!s}\n\n"
130
129
 
131
- examples: List[str] = argument.prop.examples
130
+ examples: list[str] = argument.prop.examples
132
131
  if examples and len(examples) > 0:
133
- system += f"[EXAMPLES]\n{str(examples)}\n\n"
132
+ system += f"[EXAMPLES]\n{examples!s}\n\n"
133
+
134
+ return system
134
135
 
136
+ def _build_user_instruction(self, argument) -> str:
137
+ user = ""
135
138
  if argument.prop.prompt is not None and len(argument.prop.prompt) > 0:
136
139
  val = str(argument.prop.prompt)
137
- # in this engine, instructions are considered as user prompts
138
140
  user += f"[INSTRUCTION]\n{val}"
141
+ return user
139
142
 
140
- suffix: str = str(argument.prop.processed_input)
141
-
143
+ def _extract_system_instructions(self, argument, system: str, suffix: str) -> tuple[str, str]:
142
144
  if '[SYSTEM_INSTRUCTION::]: <<<' in suffix and argument.prop.parse_system_instructions:
143
145
  parts = suffix.split('\n>>>\n')
144
- # first parts are the system instructions
145
- c = 0
146
- for i, p in enumerate(parts):
147
- if 'SYSTEM_INSTRUCTION' in p:
148
- system += f"{p}\n"
149
- c += 1
146
+ consumed = 0
147
+ for part in parts:
148
+ if 'SYSTEM_INSTRUCTION' in part:
149
+ system += f"{part}\n"
150
+ consumed += 1
150
151
  else:
151
152
  break
152
- # last part is the user input
153
- suffix = '\n>>>\n'.join(parts[c:])
154
- user += f"{suffix}"
153
+ suffix = '\n>>>\n'.join(parts[consumed:])
154
+ return system, suffix
155
155
 
156
+ def _append_template_suffix(self, user: str, argument) -> str:
156
157
  if argument.prop.template_suffix:
157
- user += f"\n[[PLACEHOLDER]]\n{str(argument.prop.template_suffix)}\n\n"
158
- user += f"Only generate content for the placeholder `[[PLACEHOLDER]]` following the instructions and context information. Do NOT write `[[PLACEHOLDER]]` or anything else in your output.\n\n"
158
+ user += f"\n[[PLACEHOLDER]]\n{argument.prop.template_suffix!s}\n\n"
159
+ user += "Only generate content for the placeholder `[[PLACEHOLDER]]` following the instructions and context information. Do NOT write `[[PLACEHOLDER]]` or anything else in your output.\n\n"
160
+ return user
161
+
162
+ def prepare(self, argument):
163
+ if self._handle_raw_input(argument):
164
+ return
165
+
166
+ system: str = ""
167
+ system = f'{system}\n' if system and len(system) > 0 else ''
168
+ system = self._append_context_sections(system, argument)
169
+
170
+ user = self._build_user_instruction(argument)
171
+ suffix: str = str(argument.prop.processed_input)
172
+ system, suffix = self._extract_system_instructions(argument, system, suffix)
173
+ user += f"{suffix}"
174
+ user = self._append_template_suffix(user, argument)
159
175
 
160
176
  user_prompt = { "role": "user", "content": user }
161
177
  argument.prop.prepared_input = [
@@ -1,17 +1,16 @@
1
+ import contextlib
1
2
  import itertools
2
3
  import warnings
3
- import numpy as np
4
4
 
5
- warnings.filterwarnings('ignore', module='pinecone')
6
- try:
7
- from pinecone import Pinecone, ServerlessSpec
8
- except:
9
- pass
10
-
11
- from ...base import Engine
12
- from ...settings import SYMAI_CONFIG
13
5
  from .... import core_ext
14
6
  from ....symbol import Result
7
+ from ....utils import UserMessage
8
+ from ...base import Engine
9
+ from ...settings import SYMAI_CONFIG
10
+
11
+ warnings.filterwarnings('ignore', module='pinecone')
12
+ with contextlib.suppress(BaseException):
13
+ from pinecone import Pinecone, ServerlessSpec
15
14
 
16
15
 
17
16
  def chunks(iterable, batch_size=100):
@@ -47,23 +46,23 @@ class PineconeResult(Result):
47
46
  return
48
47
 
49
48
  for i, match in enumerate(self.value):
50
- match = match.strip()
51
- if match.startswith('# ----[FILE_START]') and '# ----[FILE_END]' in match:
52
- m = match.split('[FILE_CONTENT]:')[-1].strip()
49
+ match_value = match.strip()
50
+ if match_value.startswith('# ----[FILE_START]') and '# ----[FILE_END]' in match_value:
51
+ m = match_value.split('[FILE_CONTENT]:')[-1].strip()
53
52
  splits = m.split('# ----[FILE_END]')
54
- assert len(splits) >= 2, 'Invalid file format: {}'.format(splits)
53
+ assert len(splits) >= 2, f'Invalid file format: {splits}'
55
54
  content = splits[0]
56
55
  file_name = ','.join(splits[1:]) # TODO: check why there are multiple file names
57
56
  yield file_name.strip(), content.strip()
58
57
  else:
59
- yield i+1, match
58
+ yield i+1, match_value
60
59
 
61
60
  def __str__(self):
62
61
  str_view = ''
63
62
  for filename, content in self._unpack_matches():
64
63
  # indent each line of the content
65
- content = '\n'.join([' ' + line for line in content.split('\n')])
66
- str_view += f'* {filename}\n{content}\n\n'
64
+ content_view = '\n'.join([' ' + line for line in content.split('\n')])
65
+ str_view += f'* {filename}\n{content_view}\n\n'
67
66
  return f'''
68
67
  [RESULT]
69
68
  {'-=-' * 13}
@@ -138,7 +137,7 @@ class PineconeIndexEngine(Engine):
138
137
  def id(self) -> str:
139
138
  if SYMAI_CONFIG['INDEXING_ENGINE_API_KEY']:
140
139
  if Pinecone is None:
141
- print('Pinecone is not installed. Please install it with `pip install symbolicai[pinecone]`.')
140
+ UserMessage('Pinecone is not installed. Please install it with `pip install symbolicai[pinecone]`.')
142
141
  return 'index'
143
142
  return super().id() # default to unregistered
144
143
 
@@ -150,13 +149,13 @@ class PineconeIndexEngine(Engine):
150
149
  self.environment = kwargs['INDEXING_ENGINE_ENVIRONMENT']
151
150
 
152
151
  def _configure_index(self, **kwargs):
153
- index_name = kwargs['index_name'] if 'index_name' in kwargs else self.index_name
152
+ index_name = kwargs.get('index_name', self.index_name)
154
153
 
155
- del_ = kwargs['index_del'] if 'index_del' in kwargs else False
154
+ del_ = kwargs.get('index_del', False)
156
155
  if self.index is not None and del_:
157
156
  self.pinecone.delete_index(index_name)
158
157
 
159
- get_ = kwargs['index_get'] if 'index_get' in kwargs else False
158
+ get_ = kwargs.get('index_get', False)
160
159
  if self.index is not None and get_:
161
160
  self.index = self.pinecone.Index(name=index_name)
162
161
 
@@ -184,9 +183,9 @@ class PineconeIndexEngine(Engine):
184
183
  self._configure_index(**kwargs)
185
184
 
186
185
  if operation == 'search':
187
- index_top_k = kwargs['index_top_k'] if 'index_top_k' in kwargs else self.index_top_k
188
- index_values = kwargs['index_values'] if 'index_values' in kwargs else self.index_values
189
- index_metadata = kwargs['index_metadata'] if 'index_metadata' in kwargs else self.index_metadata
186
+ index_top_k = kwargs.get('index_top_k', self.index_top_k)
187
+ index_values = kwargs.get('index_values', self.index_values)
188
+ index_metadata = kwargs.get('index_metadata', self.index_metadata)
190
189
  rsp = self._query(embedding, index_top_k, index_values, index_metadata)
191
190
 
192
191
  elif operation == 'add':
@@ -197,7 +196,7 @@ class PineconeIndexEngine(Engine):
197
196
  self._configure_index(**kwargs)
198
197
 
199
198
  else:
200
- raise ValueError('Invalid operation')
199
+ UserMessage('Invalid operation', raise_with=ValueError)
201
200
 
202
201
  metadata = {}
203
202
 
@@ -1,9 +1,10 @@
1
1
  import itertools
2
2
  from copy import deepcopy
3
+ from typing import ClassVar
3
4
 
4
5
  from ....extended.vectordb import VectorDB
5
6
  from ....symbol import Result
6
- from ....utils import CustomUserWarning
7
+ from ....utils import UserMessage
7
8
  from ...base import Engine
8
9
  from ...settings import SYMAI_CONFIG
9
10
 
@@ -40,23 +41,23 @@ class VectorDBResult(Result):
40
41
  if not self.value:
41
42
  return
42
43
  for i, match in enumerate(self.value):
43
- match = match.strip()
44
- if match.startswith('# ----[FILE_START]') and '# ----[FILE_END]' in match:
45
- m = match.split('[FILE_CONTENT]:')[-1].strip()
44
+ match_value = match.strip()
45
+ if match_value.startswith('# ----[FILE_START]') and '# ----[FILE_END]' in match_value:
46
+ m = match_value.split('[FILE_CONTENT]:')[-1].strip()
46
47
  splits = m.split('# ----[FILE_END]')
47
- assert len(splits) >= 2, 'Invalid file format: {}'.format(splits)
48
+ assert len(splits) >= 2, f'Invalid file format: {splits}'
48
49
  content = splits[0]
49
50
  file_name = ','.join(splits[1:]) # TODO: check why there are multiple file names
50
51
  yield file_name.strip(), content.strip()
51
52
  else:
52
- yield i+1, match
53
+ yield i+1, match_value
53
54
 
54
55
  def __str__(self):
55
56
  str_view = ''
56
57
  for filename, content in self._unpack_matches():
57
58
  # indent each line of the content
58
- content = '\n'.join([' ' + line for line in content.split('\n')])
59
- str_view += f'* {filename}\n{content}\n\n'
59
+ content_view = '\n'.join([' ' + line for line in content.split('\n')])
60
+ str_view += f'* {filename}\n{content_view}\n\n'
60
61
  return f'''
61
62
  [RESULT]
62
63
  {'-=-' * 13}
@@ -85,8 +86,8 @@ class VectorDBIndexEngine(Engine):
85
86
  _default_index_dims = 768
86
87
  _default_index_top_k = 5
87
88
  _default_index_metric = 'cosine'
88
- _index_dict = {}
89
- _index_storage_file = None
89
+ _index_dict: ClassVar[dict[str, object]] = {}
90
+ _index_storage_file: ClassVar[str | None] = None
90
91
  def __init__(
91
92
  self,
92
93
  index_name=_default_index_name,
@@ -95,7 +96,7 @@ class VectorDBIndexEngine(Engine):
95
96
  index_metric=_default_index_metric,
96
97
  index_dict=_index_dict,
97
98
  index_storage_file=_index_storage_file,
98
- **kwargs
99
+ **_kwargs
99
100
  ):
100
101
  super().__init__()
101
102
  self.config = deepcopy(SYMAI_CONFIG)
@@ -131,7 +132,7 @@ class VectorDBIndexEngine(Engine):
131
132
 
132
133
  if operation == 'search':
133
134
  if isinstance(query, list) and len(query) > 1:
134
- CustomUserWarning('VectorDB indexing engine does not support multiple queries. Pass a single string query instead.', raise_with=ValueError)
135
+ UserMessage('VectorDB indexing engine does not support multiple queries. Pass a single string query instead.', raise_with=ValueError)
135
136
  query_vector = self.index[index_name].embedding_function([query])[0]
136
137
  results = self.index[index_name](vector=query_vector, top_k=top_k, return_similarities=similarities)
137
138
  rsp = [{'metadata': {'text': result}} for result in results]
@@ -154,9 +155,9 @@ class VectorDBIndexEngine(Engine):
154
155
  elif kwargs.get('purge', maybe_as_prompt == 'purge'):
155
156
  self.purge(index_name)
156
157
  else:
157
- CustomUserWarning('Invalid configuration; please use either "load", "save", or "purge".', raise_with=ValueError)
158
+ UserMessage('Invalid configuration; please use either "load", "save", or "purge".', raise_with=ValueError)
158
159
  else:
159
- CustomUserWarning('Invalid operation; please use either "search", "add", or "config".', raise_with=ValueError)
160
+ UserMessage('Invalid operation; please use either "search", "add", or "config".', raise_with=ValueError)
160
161
 
161
162
  metadata = {}
162
163
  rsp = VectorDBResult(rsp, query[0], None)
@@ -176,6 +177,7 @@ class VectorDBIndexEngine(Engine):
176
177
  def prepare(self, argument):
177
178
  assert not argument.prop.processed_input, 'VectorDB indexing engine does not support processed_input.'
178
179
  argument.prop.prepared_input = argument.prop.prompt
180
+ argument.prop.limit = 1
179
181
 
180
182
  def load(self, index_name, storage_file, index_dims, top_k, metric):
181
183
  self.index[index_name] = VectorDB(