symbolicai 0.20.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. symai/__init__.py +96 -64
  2. symai/backend/base.py +93 -80
  3. symai/backend/engines/drawing/engine_bfl.py +12 -11
  4. symai/backend/engines/drawing/engine_gpt_image.py +108 -87
  5. symai/backend/engines/embedding/engine_llama_cpp.py +25 -28
  6. symai/backend/engines/embedding/engine_openai.py +3 -5
  7. symai/backend/engines/execute/engine_python.py +6 -5
  8. symai/backend/engines/files/engine_io.py +74 -67
  9. symai/backend/engines/imagecaptioning/engine_blip2.py +3 -3
  10. symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +54 -38
  11. symai/backend/engines/index/engine_pinecone.py +23 -24
  12. symai/backend/engines/index/engine_vectordb.py +16 -14
  13. symai/backend/engines/lean/engine_lean4.py +38 -34
  14. symai/backend/engines/neurosymbolic/__init__.py +41 -13
  15. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +262 -182
  16. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +263 -191
  17. symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +53 -49
  18. symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +212 -211
  19. symai/backend/engines/neurosymbolic/engine_groq.py +87 -63
  20. symai/backend/engines/neurosymbolic/engine_huggingface.py +21 -24
  21. symai/backend/engines/neurosymbolic/engine_llama_cpp.py +117 -48
  22. symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +256 -229
  23. symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +270 -150
  24. symai/backend/engines/ocr/engine_apilayer.py +6 -8
  25. symai/backend/engines/output/engine_stdout.py +1 -4
  26. symai/backend/engines/search/engine_openai.py +7 -7
  27. symai/backend/engines/search/engine_perplexity.py +5 -5
  28. symai/backend/engines/search/engine_serpapi.py +12 -14
  29. symai/backend/engines/speech_to_text/engine_local_whisper.py +20 -27
  30. symai/backend/engines/symbolic/engine_wolframalpha.py +3 -3
  31. symai/backend/engines/text_to_speech/engine_openai.py +5 -7
  32. symai/backend/engines/text_vision/engine_clip.py +7 -11
  33. symai/backend/engines/userinput/engine_console.py +3 -3
  34. symai/backend/engines/webscraping/engine_requests.py +81 -48
  35. symai/backend/mixin/__init__.py +13 -0
  36. symai/backend/mixin/anthropic.py +4 -2
  37. symai/backend/mixin/deepseek.py +2 -0
  38. symai/backend/mixin/google.py +2 -0
  39. symai/backend/mixin/openai.py +11 -3
  40. symai/backend/settings.py +83 -16
  41. symai/chat.py +101 -78
  42. symai/collect/__init__.py +7 -1
  43. symai/collect/dynamic.py +77 -69
  44. symai/collect/pipeline.py +35 -27
  45. symai/collect/stats.py +75 -63
  46. symai/components.py +198 -169
  47. symai/constraints.py +15 -12
  48. symai/core.py +698 -359
  49. symai/core_ext.py +32 -34
  50. symai/endpoints/api.py +80 -73
  51. symai/extended/.DS_Store +0 -0
  52. symai/extended/__init__.py +46 -12
  53. symai/extended/api_builder.py +11 -8
  54. symai/extended/arxiv_pdf_parser.py +13 -12
  55. symai/extended/bibtex_parser.py +2 -3
  56. symai/extended/conversation.py +101 -90
  57. symai/extended/document.py +17 -10
  58. symai/extended/file_merger.py +18 -13
  59. symai/extended/graph.py +18 -13
  60. symai/extended/html_style_template.py +2 -4
  61. symai/extended/interfaces/blip_2.py +1 -2
  62. symai/extended/interfaces/clip.py +1 -2
  63. symai/extended/interfaces/console.py +7 -1
  64. symai/extended/interfaces/dall_e.py +1 -1
  65. symai/extended/interfaces/flux.py +1 -1
  66. symai/extended/interfaces/gpt_image.py +1 -1
  67. symai/extended/interfaces/input.py +1 -1
  68. symai/extended/interfaces/llava.py +0 -1
  69. symai/extended/interfaces/naive_vectordb.py +7 -8
  70. symai/extended/interfaces/naive_webscraping.py +1 -1
  71. symai/extended/interfaces/ocr.py +1 -1
  72. symai/extended/interfaces/pinecone.py +6 -5
  73. symai/extended/interfaces/serpapi.py +1 -1
  74. symai/extended/interfaces/terminal.py +2 -3
  75. symai/extended/interfaces/tts.py +1 -1
  76. symai/extended/interfaces/whisper.py +1 -1
  77. symai/extended/interfaces/wolframalpha.py +1 -1
  78. symai/extended/metrics/__init__.py +11 -1
  79. symai/extended/metrics/similarity.py +11 -13
  80. symai/extended/os_command.py +17 -16
  81. symai/extended/packages/__init__.py +29 -3
  82. symai/extended/packages/symdev.py +19 -16
  83. symai/extended/packages/sympkg.py +12 -9
  84. symai/extended/packages/symrun.py +21 -19
  85. symai/extended/repo_cloner.py +11 -10
  86. symai/extended/seo_query_optimizer.py +1 -2
  87. symai/extended/solver.py +20 -23
  88. symai/extended/summarizer.py +4 -3
  89. symai/extended/taypan_interpreter.py +10 -12
  90. symai/extended/vectordb.py +99 -82
  91. symai/formatter/__init__.py +9 -1
  92. symai/formatter/formatter.py +12 -16
  93. symai/formatter/regex.py +62 -63
  94. symai/functional.py +176 -122
  95. symai/imports.py +136 -127
  96. symai/interfaces.py +56 -27
  97. symai/memory.py +14 -13
  98. symai/misc/console.py +49 -39
  99. symai/misc/loader.py +5 -3
  100. symai/models/__init__.py +17 -1
  101. symai/models/base.py +269 -181
  102. symai/models/errors.py +0 -1
  103. symai/ops/__init__.py +32 -22
  104. symai/ops/measures.py +11 -15
  105. symai/ops/primitives.py +348 -228
  106. symai/post_processors.py +32 -28
  107. symai/pre_processors.py +39 -41
  108. symai/processor.py +6 -4
  109. symai/prompts.py +59 -45
  110. symai/server/huggingface_server.py +23 -20
  111. symai/server/llama_cpp_server.py +7 -5
  112. symai/shell.py +3 -4
  113. symai/shellsv.py +499 -375
  114. symai/strategy.py +517 -287
  115. symai/symbol.py +111 -116
  116. symai/utils.py +42 -36
  117. {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/METADATA +4 -2
  118. symbolicai-1.0.0.dist-info/RECORD +163 -0
  119. symbolicai-0.20.2.dist-info/RECORD +0 -162
  120. {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/WHEEL +0 -0
  121. {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/entry_points.txt +0 -0
  122. {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/licenses/LICENSE +0 -0
  123. {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,14 @@
1
- import os
2
1
  import re
3
2
  import shutil
4
- import requests
5
-
6
3
  from concurrent.futures import ThreadPoolExecutor, as_completed
4
+ from pathlib import Path
5
+
6
+ import requests
7
7
 
8
+ from ..backend.settings import HOME_PATH
8
9
  from ..symbol import Expression, Symbol
10
+ from ..utils import UserMessage
9
11
  from .file_merger import FileMerger
10
- from ..backend.settings import HOME_PATH
11
12
 
12
13
 
13
14
  class ArxivPdfParser(Expression):
@@ -21,11 +22,11 @@ class ArxivPdfParser(Expression):
21
22
  urls = re.findall(self.url_pattern, str(data))
22
23
 
23
24
  # Convert all urls to pdf urls
24
- pdf_urls = [f"https://arxiv.org/pdf/" + (f"{url.split('/')[-1]}.pdf" if 'pdf' not in url else {url.split('/')[-1]}) for url in urls]
25
+ pdf_urls = ["https://arxiv.org/pdf/" + (f"{url.split('/')[-1]}.pdf" if 'pdf' not in url else {url.split('/')[-1]}) for url in urls]
25
26
 
26
27
  # Create temporary folder in the home directory
27
- output_path = os.path.join(HOME_PATH, "temp/downloads")
28
- os.makedirs(output_path, exist_ok=True)
28
+ output_path = HOME_PATH / "temp" / "downloads"
29
+ output_path.mkdir(parents=True, exist_ok=True)
29
30
 
30
31
  pdf_files = []
31
32
  with ThreadPoolExecutor() as executor:
@@ -36,13 +37,13 @@ class ArxivPdfParser(Expression):
36
37
  try:
37
38
  pdf_files.append(future.result())
38
39
  except Exception as exc:
39
- print('%r generated an exception: %s' % (url, exc))
40
+ UserMessage(f"{url!r} generated an exception: {exc}")
40
41
 
41
42
  if len(pdf_files) == 0:
42
43
  return None
43
44
 
44
45
  # Merge all pdfs into one file
45
- merged_file = self.merger(output_path, **kwargs)
46
+ merged_file = self.merger(str(output_path), **kwargs)
46
47
 
47
48
  # Return the merged file as a Symbol
48
49
  return_file = self._to_symbol(merged_file)
@@ -55,7 +56,7 @@ class ArxivPdfParser(Expression):
55
56
  def download_pdf(self, url, output_path):
56
57
  # Download pdfs
57
58
  response = requests.get(url)
58
- file = os.path.join(output_path, f'{url.split("/")[-1]}')
59
- with open(file, 'wb') as f:
59
+ file_path = Path(output_path) / f'{url.split("/")[-1]}'
60
+ with file_path.open('wb') as f:
60
61
  f.write(response.content)
61
- return file
62
+ return str(file_path)
@@ -1,8 +1,7 @@
1
1
  from .. import core
2
+ from ..post_processors import CodeExtractPostProcessor
2
3
  from ..pre_processors import PreProcessor
3
4
  from ..symbol import Expression, Symbol
4
- from ..post_processors import CodeExtractPostProcessor
5
-
6
5
 
7
6
  BIB_DESCRIPTION = """[Description]
8
7
  You take in a text with references to papers and return a list of biblatex entries.
@@ -69,7 +68,7 @@ Multimodal Few-Shot Learning with Frozen Language Models Maria Tsimpoukelli
69
68
 
70
69
  class BibTexPreProcessor(PreProcessor):
71
70
  def __call__(self, argument):
72
- return '>>>\n{}\n\n<<<\n'.format(str(argument.args[0]))
71
+ return f'>>>\n{argument.args[0]!s}\n\n<<<\n'
73
72
 
74
73
 
75
74
  class BibTexParser(Expression):
@@ -1,21 +1,23 @@
1
- import os
2
1
  import pickle
2
+ from collections.abc import Callable
3
3
  from datetime import datetime
4
4
  from pathlib import Path
5
- from typing import Any, Callable, List, Optional
5
+ from typing import Any
6
6
 
7
- from ..components import FileReader, Indexer
7
+ from ..components import FileReader
8
8
  from ..formatter import TextContainerFormatter
9
9
  from ..interfaces import Interface
10
10
  from ..memory import SlidingWindowStringConcatMemory
11
11
  from ..symbol import Symbol
12
- from ..utils import CustomUserWarning, deprecated
12
+ from ..utils import UserMessage, deprecated
13
13
  from .document import DocumentRetriever
14
14
  from .seo_query_optimizer import SEOQueryOptimizer
15
15
 
16
+ _DEFAULT_TEXT_CONTAINER_FORMATTER = TextContainerFormatter(text_split=4)
17
+
16
18
 
17
19
  class CodeFormatter:
18
- def __call__(self, value: str, *args: Any, **kwds: Any) -> Any:
20
+ def __call__(self, value: str, *_args: Any, **_kwds: Any) -> Any:
19
21
  # extract code from chat conversations or ```<language>\n{code}\n``` blocks
20
22
  return Symbol(value).extract('Only extract code without ``` block markers or chat conversations')
21
23
 
@@ -23,10 +25,10 @@ class CodeFormatter:
23
25
  class Conversation(SlidingWindowStringConcatMemory):
24
26
  def __init__(
25
27
  self,
26
- init: Optional[str] = None,
27
- file_link: Optional[List[str]] = None,
28
- url_link: Optional[List[str]] = None,
29
- index_name: Optional[str] = None,
28
+ init: str | None = None,
29
+ file_link: list[str] | None = None,
30
+ url_link: list[str] | None = None,
31
+ index_name: str | None = None,
30
32
  auto_print: bool = True,
31
33
  truncation_percentage: float = 0.8,
32
34
  truncation_type: str = 'head',
@@ -61,7 +63,7 @@ class Conversation(SlidingWindowStringConcatMemory):
61
63
  self.indexer = None
62
64
  self.index = None
63
65
  if index_name is not None:
64
- CustomUserWarning("Index not supported for conversation class.", raise_with=NotImplementedError)
66
+ UserMessage("Index not supported for conversation class.", raise_with=NotImplementedError)
65
67
 
66
68
  def __getstate__(self):
67
69
  state = super().__getstate__().copy()
@@ -76,41 +78,42 @@ class Conversation(SlidingWindowStringConcatMemory):
76
78
  self.seo_opt = SEOQueryOptimizer()
77
79
  self.reader = FileReader()
78
80
  if self.index_name is not None:
79
- CustomUserWarning("Index not supported for conversation class.", raise_with=NotImplementedError)
81
+ UserMessage("Index not supported for conversation class.", raise_with=NotImplementedError)
80
82
 
81
- def store_system_message(self, message: str, *args, **kwargs):
82
- val = f"[SYSTEM_INSTRUCTION::]: <<<\n{str(message)}\n>>>\n"
83
+ def store_system_message(self, message: str, *_args, **_kwargs):
84
+ val = f"[SYSTEM_INSTRUCTION::]: <<<\n{message!s}\n>>>\n"
83
85
  self.store(val)
84
86
 
85
- def store_file(self, file_path: str, *args, **kwargs):
87
+ def store_file(self, file_path: str, *_args, **_kwargs):
86
88
  content = self.reader(file_path)
87
- val = f"[DATA::{file_path}]: <<<\n{str(content)}\n>>>\n"
89
+ val = f"[DATA::{file_path}]: <<<\n{content!s}\n>>>\n"
88
90
  self.store(val)
89
91
 
90
- def store_url(self, url: str, *args, **kwargs):
92
+ def store_url(self, url: str, *_args, **_kwargs):
91
93
  content = self.scraper(url)
92
- val = f"[DATA::{url}]: <<<\n{str(content)}\n>>>\n"
94
+ val = f"[DATA::{url}]: <<<\n{content!s}\n>>>\n"
93
95
  self.store(val)
94
96
 
95
97
  @staticmethod
96
98
  def save_conversation_state(conversation: "Conversation", file_path: str) -> None:
97
99
  # Check if path exists and create it if it doesn't
98
- dir_path = os.path.dirname(file_path)
99
- os.makedirs(dir_path, exist_ok=True)
100
+ path_obj = Path(file_path)
101
+ path_obj.parent.mkdir(parents=True, exist_ok=True)
100
102
  # Save the conversation object as a pickle file
101
- with open(file_path, 'wb') as handle:
103
+ with path_obj.open('wb') as handle:
102
104
  pickle.dump(conversation, handle, protocol=pickle.HIGHEST_PROTOCOL)
103
105
 
104
106
  def load_conversation_state(self, path: str) -> "Conversation":
105
107
  # Check if the file exists and it's not empty
106
- if os.path.exists(path):
107
- if os.path.getsize(path) <= 0:
108
- raise Exception("File is empty.")
108
+ path_obj = Path(path)
109
+ if path_obj.exists():
110
+ if path_obj.stat().st_size <= 0:
111
+ UserMessage("File is empty.", raise_with=Exception)
109
112
  # Load the conversation object from a pickle file
110
- with open(path, 'rb') as handle:
113
+ with path_obj.open('rb') as handle:
111
114
  conversation_state = pickle.load(handle)
112
115
  else:
113
- raise Exception("File does not exist or is empty.")
116
+ UserMessage("File does not exist or is empty.", raise_with=Exception)
114
117
 
115
118
  # Create a new instance of the `Conversation` class and restore
116
119
  # the state from the saved conversation
@@ -127,10 +130,10 @@ class Conversation(SlidingWindowStringConcatMemory):
127
130
  self.seo_opt = SEOQueryOptimizer()
128
131
  self.reader = FileReader()
129
132
  if self.index_name is not None:
130
- CustomUserWarning("Index not supported for conversation class.", raise_with=NotImplementedError)
133
+ UserMessage("Index not supported for conversation class.", raise_with=NotImplementedError)
131
134
  return self
132
135
 
133
- def commit(self, target_file: str = None, formatter: Optional[Callable] = None):
136
+ def commit(self, target_file: str | None = None, formatter: Callable | None = None):
134
137
  if target_file and isinstance(target_file, str):
135
138
  file_link = target_file
136
139
  else:
@@ -141,7 +144,7 @@ class Conversation(SlidingWindowStringConcatMemory):
141
144
  file_link = file_link[0]
142
145
  else:
143
146
  file_link = None # cannot commit to multiple files
144
- raise Exception('Cannot commit to multiple files.')
147
+ UserMessage('Cannot commit to multiple files.', raise_with=Exception)
145
148
  if file_link:
146
149
  # if file extension is .py, then format code
147
150
  format_ = formatter
@@ -150,10 +153,10 @@ class Conversation(SlidingWindowStringConcatMemory):
150
153
  if formatter:
151
154
  val = formatter(val)
152
155
  # if file does not exist, create it
153
- with open(file_link, 'w') as file:
156
+ with Path(file_link).open('w') as file:
154
157
  file.write(str(val))
155
158
  else:
156
- raise Exception('File link is not set or a set of files.')
159
+ UserMessage('File link is not set or a set of files.', raise_with=Exception)
157
160
 
158
161
  def save(self, path: str, replace: bool = False) -> Symbol:
159
162
  return Symbol(self._memory).save(path, replace=replace)
@@ -161,64 +164,78 @@ class Conversation(SlidingWindowStringConcatMemory):
161
164
  def build_tag(self, tag: str, query: str) -> str:
162
165
  # get timestamp in string format
163
166
  timestamp = datetime.now().strftime("%d/%m/%Y %H:%M:%S:%f")
164
- return str(f"[{tag}{timestamp}]: <<<\n{str(query)}\n>>>\n")
167
+ return str(f"[{tag}{timestamp}]: <<<\n{query!s}\n>>>\n")
165
168
 
166
169
  def forward(self, query: str, *args, **kwargs):
167
- # dynamic takes precedence over static
168
- dynamic_truncation_percentage = kwargs.get('truncation_percentage', self.truncation_percentage)
169
- dynamic_truncation_type = kwargs.get('truncation_type', self.truncation_type)
170
- kwargs = {**kwargs, 'truncation_percentage': dynamic_truncation_percentage, 'truncation_type': dynamic_truncation_type}
171
-
170
+ kwargs = self._apply_truncation_overrides(kwargs)
172
171
  query = self._to_symbol(query)
173
- memory = None
174
-
175
- if self.index is not None:
176
- memory_split = self._memory.split(self.marker)
177
- memory_shards = []
178
- for ms in memory_split:
179
- if ms.strip() == '':
180
- continue
181
- memory_shards.append(ms)
182
-
183
- length_memory_shards = len(memory_shards)
184
- if length_memory_shards <= 3:
185
- memory_shards = memory_shards
186
- elif length_memory_shards <= 5:
187
- memory_shards = memory_shards[:2] + memory_shards[-(length_memory_shards-2):]
188
- else:
189
- memory_shards = memory_shards[:2] + memory_shards[-3:]
172
+ memory = self._retrieve_index_memory(query, args, kwargs)
173
+ payload = self._build_payload(kwargs, memory)
174
+ res = self.recall(query, *args, payload=payload, **kwargs)
175
+
176
+ # if user is requesting to preview the response, then return only the preview result
177
+ if kwargs.get('preview'):
178
+ if self.auto_print:
179
+ UserMessage(str(res), style="text")
180
+ return res
181
+
182
+ ### --- asses memory update --- ###
183
+
184
+ self._append_interaction_to_memory(query, res)
185
+
186
+ # WARN: DO NOT PROCESS THE RES BY REMOVING `<<<` AND `>>>` TAGS
187
+
188
+ if self.auto_print:
189
+ UserMessage(str(res), style="text")
190
+ return res
190
191
 
191
- search_query = query | '\n' | '\n'.join(memory_shards)
192
- if kwargs.get('use_seo_opt'):
193
- search_query = self.seo_opt(f'[Query]:' | search_query)
194
- memory = self.index(search_query, *args, **kwargs)
192
+ def _apply_truncation_overrides(self, kwargs: dict[str, Any]) -> dict[str, Any]:
193
+ dynamic_truncation_percentage = kwargs.get('truncation_percentage', self.truncation_percentage)
194
+ dynamic_truncation_type = kwargs.get('truncation_type', self.truncation_type)
195
+ return {
196
+ **kwargs,
197
+ 'truncation_percentage': dynamic_truncation_percentage,
198
+ 'truncation_type': dynamic_truncation_type,
199
+ }
200
+
201
+ def _retrieve_index_memory(self, query: Symbol, args: tuple[Any, ...], kwargs: dict[str, Any]):
202
+ if self.index is None:
203
+ return None
204
+
205
+ memory_split = self._memory.split(self.marker)
206
+ memory_shards = []
207
+ for shard in memory_split:
208
+ if shard.strip() == '':
209
+ continue
210
+ memory_shards.append(shard)
211
+
212
+ length_memory_shards = len(memory_shards)
213
+ if length_memory_shards > 5:
214
+ memory_shards = memory_shards[:2] + memory_shards[-3:]
215
+ elif length_memory_shards > 3:
216
+ retained = memory_shards[-(length_memory_shards - 2):]
217
+ memory_shards = memory_shards[:2] + retained
218
+
219
+ search_query = query | '\n' | '\n'.join(memory_shards)
220
+ if kwargs.get('use_seo_opt'):
221
+ search_query = self.seo_opt('[Query]:' | search_query)
222
+ memory = self.index(search_query, *args, **kwargs)
195
223
 
196
- if 'raw_result' in kwargs:
197
- print(memory)
224
+ if 'raw_result' in kwargs:
225
+ UserMessage(str(memory), style="text")
226
+ return memory
198
227
 
228
+ def _build_payload(self, kwargs: dict[str, Any], memory) -> str:
199
229
  payload = ''
200
- # if payload is set, then add it to the memory
201
230
  if 'payload' in kwargs:
202
- payload = f"[Conversation Payload]:\n{kwargs.pop('payload')}\n"
231
+ payload = f"[Conversation Payload]:\n{kwargs.pop('payload')}\n"
203
232
 
204
233
  index_memory = ''
205
- # if index is set, then add it to the memory
206
234
  if memory:
207
235
  index_memory = f'[Index Retrieval]:\n{str(memory)[:1500]}\n'
236
+ return f'{index_memory}{payload}'
208
237
 
209
- payload = f'{index_memory}{payload}'
210
- # perform a recall function using the query
211
- res = self.recall(query, *args, payload=payload, **kwargs)
212
-
213
- # if user is requesting to preview the response, then return only the preview result
214
- if 'preview' in kwargs and kwargs['preview']:
215
- if self.auto_print:
216
- print(res)
217
- return res
218
-
219
- ### --- asses memory update --- ###
220
-
221
- # append the bot prompt to the memory
238
+ def _append_interaction_to_memory(self, query: Symbol, res: Symbol) -> None:
222
239
  prompt = self.build_tag(self.user_tag, query)
223
240
  self.store(prompt)
224
241
 
@@ -226,12 +243,6 @@ class Conversation(SlidingWindowStringConcatMemory):
226
243
  val = self.build_tag(self.bot_tag, res)
227
244
  self.store(val)
228
245
 
229
- # WARN: DO NOT PROCESS THE RES BY REMOVING `<<<` AND `>>>` TAGS
230
-
231
- if self.auto_print:
232
- print(res)
233
- return res
234
-
235
246
 
236
247
  RETRIEVAL_CONTEXT = """[Description]
237
248
  This is a conversation between a retrieval augmented indexing program and a user. The system combines document retrieval with conversational AI to provide context-aware responses. It can:
@@ -269,23 +280,23 @@ Responses should be:
269
280
  class RetrievalAugmentedConversation(Conversation):
270
281
  def __init__(
271
282
  self,
272
- folder_path: Optional[str] = None,
283
+ folder_path: str | None = None,
273
284
  *,
274
- index_name: Optional[str] = None,
275
- max_depth: Optional[int] = 0,
285
+ index_name: str | None = None,
286
+ max_depth: int | None = 0,
276
287
  auto_print: bool = True,
277
288
  top_k: int = 5,
278
- formatter: Callable = TextContainerFormatter(text_split=4),
289
+ formatter: Callable = _DEFAULT_TEXT_CONTAINER_FORMATTER,
279
290
  overwrite: bool = False,
280
291
  truncation_percentage: float = 0.8,
281
292
  truncation_type: str = 'head',
282
293
  with_metadata: bool = False,
283
- raw_result: Optional[bool] = False,
284
- new_dim: Optional[int] = None,
294
+ raw_result: bool | None = False,
295
+ new_dim: int | None = None,
285
296
  **kwargs
286
297
  ):
287
298
 
288
- super().__init__(auto_print=auto_print, truncation_percentage=truncation_percentage, truncation_type=truncation_type, with_metadata=with_metadata, *kwargs)
299
+ super().__init__(auto_print=auto_print, truncation_percentage=truncation_percentage, truncation_type=truncation_type, with_metadata=with_metadata, **kwargs)
289
300
 
290
301
  self.retriever = DocumentRetriever(
291
302
  source=folder_path,
@@ -323,7 +334,7 @@ class RetrievalAugmentedConversation(Conversation):
323
334
  memory = self.index(query, *args, **kwargs)
324
335
 
325
336
  if 'raw_result' in kwargs:
326
- print(memory)
337
+ UserMessage(str(memory), style="text")
327
338
  return memory
328
339
 
329
340
  prompt = self.build_tag(self.user_tag, query)
@@ -338,5 +349,5 @@ class RetrievalAugmentedConversation(Conversation):
338
349
  self.store(val)
339
350
 
340
351
  if self.auto_print:
341
- print(res)
352
+ UserMessage(str(res), style="text")
342
353
  return res
@@ -1,25 +1,31 @@
1
- import os
1
+ from collections.abc import Callable
2
2
  from pathlib import Path
3
- from typing import Callable, List, Optional, Union
3
+ from typing import TYPE_CHECKING, Union
4
4
 
5
5
  from ..components import FileReader, Indexer
6
6
  from ..formatter import ParagraphFormatter
7
7
  from ..symbol import Expression, Symbol
8
+ from ..utils import UserMessage
9
+
10
+ if TYPE_CHECKING:
11
+ from ..backend.engines.files.engine_io import TextContainer
12
+
13
+ _DEFAULT_PARAGRAPH_FORMATTER = ParagraphFormatter()
8
14
 
9
15
 
10
16
  class DocumentRetriever(Expression):
11
17
  def __init__(
12
18
  self,
13
- source: Optional[str] = None,
19
+ source: str | None = None,
14
20
  *,
15
21
  index_name: str = Indexer.DEFAULT,
16
22
  top_k: int = 5,
17
23
  max_depth: int = 1,
18
- formatter: Callable = ParagraphFormatter(),
24
+ formatter: Callable = _DEFAULT_PARAGRAPH_FORMATTER,
19
25
  overwrite: bool = False,
20
26
  with_metadata: bool = False,
21
- raw_result: Optional[bool] = False,
22
- new_dim: Optional[int] = None,
27
+ raw_result: bool | None = False,
28
+ new_dim: int | None = None,
23
29
  **kwargs
24
30
  ):
25
31
  super().__init__(**kwargs)
@@ -42,14 +48,14 @@ class DocumentRetriever(Expression):
42
48
  def forward(
43
49
  self,
44
50
  query: Symbol,
45
- raw_result: Optional[bool] = False,
51
+ raw_result: bool | None = False,
46
52
  ) -> Symbol:
47
53
  return self.index(
48
54
  query,
49
55
  raw_result=raw_result,
50
56
  )
51
57
 
52
- def insert(self, source: Union[str, Path], **kwargs):
58
+ def insert(self, source: str | Path, **kwargs):
53
59
  # dynamically insert data into the index given a session
54
60
  # the data can be:
55
61
  # - a string (e.g. something that the user wants to insert)
@@ -60,7 +66,7 @@ class DocumentRetriever(Expression):
60
66
  self.add(text, index_name=self.indexer.index_name, **kwargs)
61
67
  self.config(None, save=True, index_name=self.indexer.index_name, **kwargs)
62
68
 
63
- def parse_source(self, source: str, with_metadata: bool, max_depth: int, **kwargs) -> List[Union[str, 'TextContainer']]:
69
+ def parse_source(self, source: str, with_metadata: bool, max_depth: int, **kwargs) -> list[Union[str, 'TextContainer']]:
64
70
  maybe_path = Path(source)
65
71
  if isinstance(source, str) and not (maybe_path.is_file() or maybe_path.is_dir()):
66
72
  return Symbol(source).zip(new_dim=self.new_dim)
@@ -69,4 +75,5 @@ class DocumentRetriever(Expression):
69
75
  return self.reader(files, with_metadata=with_metadata, **kwargs)
70
76
  if maybe_path.is_file():
71
77
  return self.reader(source, with_metadata=with_metadata, **kwargs)
72
- raise ValueError(f"Invalid source: {source}; must be a file, directory, or string")
78
+ UserMessage(f"Invalid source: {source}; must be a file, directory, or string", raise_with=ValueError)
79
+ return []
@@ -1,10 +1,10 @@
1
1
  import os
2
+ from pathlib import Path
2
3
 
3
4
  from tqdm import tqdm
4
- from typing import List
5
5
 
6
- from ..symbol import Expression, Symbol
7
6
  from ..components import FileReader
7
+ from ..symbol import Expression, Symbol
8
8
 
9
9
 
10
10
  class FileMerger(Expression):
@@ -12,8 +12,12 @@ class FileMerger(Expression):
12
12
  Class to merge contents of multiple files into one, specified by their file endings and root path.
13
13
  Files specified in the exclude list will not be included.
14
14
  """
15
- def __init__(self, file_endings: List[str] = ['.py', '.md', '.txt', '.sh', '.pdf', '.json', '.yaml', '.java', '.cpp', '.hpp', '.c', '.h', '.js', '.css', '.html', '.xml', '.csv', '.tsv', '.yml', '.rst', '.ipynb', '.tex', '.bib'],
16
- file_excludes: List[str] = ['__init__.py', '__pycache__', 'LICENSE', 'requirements.txt', 'environment.yaml', '.git'], **kwargs):
15
+ def __init__(self, file_endings: list[str] | None = None,
16
+ file_excludes: list[str] | None = None, **kwargs):
17
+ if file_excludes is None:
18
+ file_excludes = ['__init__.py', '__pycache__', 'LICENSE', 'requirements.txt', 'environment.yaml', '.git']
19
+ if file_endings is None:
20
+ file_endings = ['.py', '.md', '.txt', '.sh', '.pdf', '.json', '.yaml', '.java', '.cpp', '.hpp', '.c', '.h', '.js', '.css', '.html', '.xml', '.csv', '.tsv', '.yml', '.rst', '.ipynb', '.tex', '.bib']
17
21
  super().__init__(**kwargs)
18
22
  self.file_endings = file_endings
19
23
  self.file_excludes = file_excludes
@@ -31,29 +35,30 @@ class FileMerger(Expression):
31
35
 
32
36
  # Implement recursive file search
33
37
  # use tqdm for progress bar and description
34
- tqdm_desc = f"Reading file: ..."
38
+ tqdm_desc = "Reading file: ..."
35
39
  # use os.walk to recursively search for files in the root path
36
40
  progress = tqdm(os.walk(root_path), desc=tqdm_desc)
37
41
 
38
- for root, dirs, files in progress:
42
+ for root, _dirs, files in progress:
39
43
  for file in files:
40
- file_path = os.path.join(root, file)
44
+ file_path = Path(root) / file
45
+ file_path_str = file_path.as_posix()
41
46
  # Exclude files with the specified names in the path
42
- if any(exclude in file_path for exclude in self.file_excludes):
47
+ if any(exclude in file_path_str for exclude in self.file_excludes):
43
48
  continue
44
49
 
45
50
  # Look only for files with the specified endings
46
51
  if file.endswith(tuple(self.file_endings)):
47
52
  # Read in the file using the FileReader
48
- file_content = self.reader(file_path, **kwargs).value
53
+ file_content = self.reader(file_path_str, **kwargs).value
49
54
 
50
55
  # escape file name spaces
51
- file_path = file_path.replace(" ", "\\ ")
56
+ file_path_escaped = file_path_str.replace(" ", "\\ ")
52
57
 
53
58
  # Append start and end markers for each file
54
- file_content = f"# ----[FILE_START]<PART1/1>{file_path}[FILE_CONTENT]:\n" + \
59
+ file_content = f"# ----[FILE_START]<PART1/1>{file_path_escaped}[FILE_CONTENT]:\n" + \
55
60
  file_content + \
56
- f"\n# ----[FILE_END]{file_path}\n"
61
+ f"\n# ----[FILE_END]{file_path_escaped}\n"
57
62
 
58
63
  # Merge the file contents
59
64
  merged_file += file_content
@@ -63,4 +68,4 @@ class FileMerger(Expression):
63
68
  progress.set_description(tqdm_desc)
64
69
 
65
70
  # Return the merged file as a Symbol
66
- return self._to_symbol(merged_file)
71
+ return self._to_symbol(merged_file)
symai/extended/graph.py CHANGED
@@ -1,5 +1,5 @@
1
+ from collections.abc import Callable
1
2
  from multiprocessing import Pool
2
- from typing import Callable
3
3
 
4
4
  from .. import core
5
5
  from ..formatter import SentenceFormatter
@@ -7,6 +7,9 @@ from ..post_processors import StripPostProcessor
7
7
  from ..pre_processors import PreProcessor
8
8
  from ..prompts import Prompt
9
9
  from ..symbol import Expression, Symbol
10
+ from ..utils import UserMessage
11
+
12
+ _DEFAULT_SENTENCE_FORMATTER = SentenceFormatter()
10
13
 
11
14
  GRAPH_DESCRIPTION = """[Description]
12
15
  Build source-target relationship pairs for named entities based for the [DATA] section. The [DATA] section contains one sentence.
@@ -18,7 +21,7 @@ If more than one entity pair is extracted from the same sentence, then the CSV f
18
21
 
19
22
  class GraphPreProcessor(PreProcessor):
20
23
  def __call__(self, argument):
21
- return '$> {} =>'.format(str(argument.args[0]))
24
+ return f'$> {argument.args[0]!s} =>'
22
25
 
23
26
 
24
27
  class Graph(Expression):
@@ -26,14 +29,14 @@ class Graph(Expression):
26
29
  def static_context(self) -> str:
27
30
  return GRAPH_DESCRIPTION
28
31
 
29
- def __init__(self, formatter: Callable = SentenceFormatter(), n_workers: int = 1, verbose: bool = False, **kwargs):
32
+ def __init__(self, formatter: Callable = _DEFAULT_SENTENCE_FORMATTER, n_workers: int = 1, verbose: bool = False, **kwargs):
30
33
  super().__init__(**kwargs)
31
34
  self.formatter = formatter
32
35
  self.n_workers = n_workers
33
36
  self.sym_return_type = Graph
34
37
  self.verbose = verbose
35
38
 
36
- def process_symbol(self, s, *args, **kwargs):
39
+ def process_symbol(self, s, *_args, **kwargs):
37
40
  res = ''
38
41
 
39
42
  @core.few_shot(prompt="Extract relationships between entities:\n",
@@ -50,26 +53,28 @@ class Graph(Expression):
50
53
  pass
51
54
 
52
55
  if len(str(s)) > 0:
53
- if self.verbose: print(s)
56
+ if self.verbose:
57
+ UserMessage(str(s))
54
58
  r = _func(self, s)
55
59
  rec = str(r)
56
60
  lines = rec.split('\n')
57
- for l in lines:
58
- l = l.strip()
59
- if len(l) > 0:
60
- csv = l.split(',')
61
+ for line in lines:
62
+ stripped_line = line.strip()
63
+ if len(stripped_line) > 0:
64
+ csv = stripped_line.split(',')
61
65
  try:
62
66
  if len(csv) == 3 and \
63
67
  csv[0].strip() != '' and \
64
68
  csv[1].strip() != '' and \
65
69
  int(csv[2].strip()) > 0:
66
- res += l + '\n'
70
+ res += stripped_line + '\n'
67
71
  except Exception as e:
68
- if self.verbose: print(e)
72
+ if self.verbose:
73
+ UserMessage(str(e))
69
74
  pass
70
75
  return res
71
76
 
72
- def forward(self, sym: Symbol, **kwargs) -> Symbol:
77
+ def forward(self, sym: Symbol, **_kwargs) -> Symbol:
73
78
  res = 'source,target,value\n'
74
79
  sym_list = self.formatter(sym).value
75
80
  if self.n_workers == 1:
@@ -82,4 +87,4 @@ class Graph(Expression):
82
87
  res += r
83
88
  return res
84
89
 
85
-
90
+ _DEFAULT_SENTENCE_FORMATTER = SentenceFormatter()
@@ -76,13 +76,11 @@ class HtmlStyleTemplate(Expression):
76
76
  These strings are combined into a single `Symbol` object which is then clustered.
77
77
  Finally, the `render` method applies the `html_template` to the clustered `Symbol` and returns the result.
78
78
  """
79
- if type(sym) != Symbol:
79
+ if not isinstance(sym, Symbol):
80
80
  sym = Symbol(sym)
81
81
  html_data = list(self.html_stream(sym, **kwargs))
82
82
  style_data = [str(self.style_template(html,
83
83
  template=HTML_TEMPLATE_STYLE,
84
84
  placeholder='{{placeholder}}',
85
85
  **kwargs)) for html in html_data]
86
- res = '\n'.join(style_data)
87
- res = Symbol(res)
88
- return res
86
+ return Symbol('\n'.join(style_data))