symbolicai 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. symai/__init__.py +198 -134
  2. symai/backend/base.py +51 -51
  3. symai/backend/engines/drawing/engine_bfl.py +33 -33
  4. symai/backend/engines/drawing/engine_gpt_image.py +4 -10
  5. symai/backend/engines/embedding/engine_llama_cpp.py +50 -35
  6. symai/backend/engines/embedding/engine_openai.py +22 -16
  7. symai/backend/engines/execute/engine_python.py +16 -16
  8. symai/backend/engines/files/engine_io.py +51 -49
  9. symai/backend/engines/imagecaptioning/engine_blip2.py +27 -23
  10. symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +53 -46
  11. symai/backend/engines/index/engine_pinecone.py +116 -88
  12. symai/backend/engines/index/engine_qdrant.py +1011 -0
  13. symai/backend/engines/index/engine_vectordb.py +78 -52
  14. symai/backend/engines/lean/engine_lean4.py +65 -25
  15. symai/backend/engines/neurosymbolic/__init__.py +28 -28
  16. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +137 -135
  17. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +145 -152
  18. symai/backend/engines/neurosymbolic/engine_cerebras.py +328 -0
  19. symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +75 -49
  20. symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +199 -155
  21. symai/backend/engines/neurosymbolic/engine_groq.py +106 -72
  22. symai/backend/engines/neurosymbolic/engine_huggingface.py +100 -67
  23. symai/backend/engines/neurosymbolic/engine_llama_cpp.py +121 -93
  24. symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +213 -132
  25. symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +180 -137
  26. symai/backend/engines/ocr/engine_apilayer.py +18 -20
  27. symai/backend/engines/output/engine_stdout.py +9 -9
  28. symai/backend/engines/{webscraping → scrape}/engine_requests.py +25 -11
  29. symai/backend/engines/search/engine_openai.py +95 -83
  30. symai/backend/engines/search/engine_parallel.py +665 -0
  31. symai/backend/engines/search/engine_perplexity.py +40 -41
  32. symai/backend/engines/search/engine_serpapi.py +33 -28
  33. symai/backend/engines/speech_to_text/engine_local_whisper.py +37 -27
  34. symai/backend/engines/symbolic/engine_wolframalpha.py +14 -8
  35. symai/backend/engines/text_to_speech/engine_openai.py +15 -19
  36. symai/backend/engines/text_vision/engine_clip.py +34 -28
  37. symai/backend/engines/userinput/engine_console.py +3 -4
  38. symai/backend/mixin/anthropic.py +48 -40
  39. symai/backend/mixin/deepseek.py +4 -5
  40. symai/backend/mixin/google.py +5 -4
  41. symai/backend/mixin/groq.py +2 -4
  42. symai/backend/mixin/openai.py +132 -110
  43. symai/backend/settings.py +14 -14
  44. symai/chat.py +164 -94
  45. symai/collect/dynamic.py +13 -11
  46. symai/collect/pipeline.py +39 -31
  47. symai/collect/stats.py +109 -69
  48. symai/components.py +556 -238
  49. symai/constraints.py +14 -5
  50. symai/core.py +1495 -1210
  51. symai/core_ext.py +55 -50
  52. symai/endpoints/api.py +113 -58
  53. symai/extended/api_builder.py +22 -17
  54. symai/extended/arxiv_pdf_parser.py +13 -5
  55. symai/extended/bibtex_parser.py +8 -4
  56. symai/extended/conversation.py +88 -69
  57. symai/extended/document.py +40 -27
  58. symai/extended/file_merger.py +45 -7
  59. symai/extended/graph.py +38 -24
  60. symai/extended/html_style_template.py +17 -11
  61. symai/extended/interfaces/blip_2.py +1 -1
  62. symai/extended/interfaces/clip.py +4 -2
  63. symai/extended/interfaces/console.py +5 -3
  64. symai/extended/interfaces/dall_e.py +3 -1
  65. symai/extended/interfaces/file.py +2 -0
  66. symai/extended/interfaces/flux.py +3 -1
  67. symai/extended/interfaces/gpt_image.py +15 -6
  68. symai/extended/interfaces/input.py +2 -1
  69. symai/extended/interfaces/llava.py +1 -1
  70. symai/extended/interfaces/{naive_webscraping.py → naive_scrape.py} +3 -2
  71. symai/extended/interfaces/naive_vectordb.py +2 -2
  72. symai/extended/interfaces/ocr.py +4 -2
  73. symai/extended/interfaces/openai_search.py +2 -0
  74. symai/extended/interfaces/parallel.py +30 -0
  75. symai/extended/interfaces/perplexity.py +2 -0
  76. symai/extended/interfaces/pinecone.py +6 -4
  77. symai/extended/interfaces/python.py +2 -0
  78. symai/extended/interfaces/serpapi.py +2 -0
  79. symai/extended/interfaces/terminal.py +0 -1
  80. symai/extended/interfaces/tts.py +2 -1
  81. symai/extended/interfaces/whisper.py +2 -1
  82. symai/extended/interfaces/wolframalpha.py +1 -0
  83. symai/extended/metrics/__init__.py +1 -1
  84. symai/extended/metrics/similarity.py +5 -2
  85. symai/extended/os_command.py +31 -22
  86. symai/extended/packages/symdev.py +39 -34
  87. symai/extended/packages/sympkg.py +30 -27
  88. symai/extended/packages/symrun.py +46 -35
  89. symai/extended/repo_cloner.py +10 -9
  90. symai/extended/seo_query_optimizer.py +15 -12
  91. symai/extended/solver.py +104 -76
  92. symai/extended/summarizer.py +8 -7
  93. symai/extended/taypan_interpreter.py +10 -9
  94. symai/extended/vectordb.py +28 -15
  95. symai/formatter/formatter.py +39 -31
  96. symai/formatter/regex.py +46 -44
  97. symai/functional.py +184 -86
  98. symai/imports.py +85 -51
  99. symai/interfaces.py +1 -1
  100. symai/memory.py +33 -24
  101. symai/menu/screen.py +28 -19
  102. symai/misc/console.py +27 -27
  103. symai/misc/loader.py +4 -3
  104. symai/models/base.py +147 -76
  105. symai/models/errors.py +1 -1
  106. symai/ops/__init__.py +1 -1
  107. symai/ops/measures.py +17 -14
  108. symai/ops/primitives.py +933 -635
  109. symai/post_processors.py +28 -24
  110. symai/pre_processors.py +58 -52
  111. symai/processor.py +15 -9
  112. symai/prompts.py +714 -649
  113. symai/server/huggingface_server.py +115 -32
  114. symai/server/llama_cpp_server.py +14 -6
  115. symai/server/qdrant_server.py +206 -0
  116. symai/shell.py +98 -39
  117. symai/shellsv.py +307 -223
  118. symai/strategy.py +135 -81
  119. symai/symbol.py +276 -225
  120. symai/utils.py +62 -46
  121. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/METADATA +19 -9
  122. symbolicai-1.1.0.dist-info/RECORD +168 -0
  123. symbolicai-1.0.0.dist-info/RECORD +0 -163
  124. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/WHEEL +0 -0
  125. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/entry_points.txt +0 -0
  126. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/licenses/LICENSE +0 -0
  127. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/top_level.txt +0 -0
@@ -64,7 +64,7 @@ res = run(value) # [MANAGED] must contain this line, do not change
64
64
 
65
65
  class APIBuilderPreProcessor(PreProcessor):
66
66
  def __call__(self, argument):
67
- return f'$> {argument.args[0]!s} =>'
67
+ return f"$> {argument.args[0]!s} =>"
68
68
 
69
69
 
70
70
  class APIBuilder(Expression):
@@ -77,9 +77,12 @@ class APIBuilder(Expression):
77
77
  self.sym_return_type = APIBuilder
78
78
 
79
79
  def forward(self, sym: Symbol, **kwargs) -> Symbol:
80
- @core.zero_shot(prompt="Build the API call code:\n",
81
- pre_processors=[APIBuilderPreProcessor()],
82
- post_processors=[CodeExtractPostProcessor()], **kwargs)
80
+ @core.zero_shot(
81
+ prompt="Build the API call code:\n",
82
+ pre_processors=[APIBuilderPreProcessor()],
83
+ post_processors=[CodeExtractPostProcessor()],
84
+ **kwargs,
85
+ )
83
86
  def _func(_, text) -> str:
84
87
  pass
85
88
 
@@ -96,18 +99,20 @@ class StackTraceRetryExecutor(Expression):
96
99
  def forward(self, code: Symbol, request: Symbol, **kwargs) -> Symbol:
97
100
  code = str(code)
98
101
  # Set value that gets passed on to the 'run' function in the generated code
99
- value = request.value # do not remove this line
102
+ value = request.value # do not remove this line
100
103
  # Create the 'run' function
101
104
  self._runnable = self.executor(code, locals=locals().copy(), globals=globals().copy())
102
- result = self._runnable['locals']['run'](value)
105
+ result = self._runnable["locals"]["run"](value)
103
106
  retry = 0
104
107
  # Retry if there is a 'Traceback' in the result
105
- while 'Traceback' in result and retry <= self.max_retries:
106
- self._runnable = self.executor(code, payload=result, locals=locals().copy(), globals=globals().copy(), **kwargs)
107
- result = self._runnable['locals']['run'](value)
108
+ while "Traceback" in result and retry <= self.max_retries:
109
+ self._runnable = self.executor(
110
+ code, payload=result, locals=locals().copy(), globals=globals().copy(), **kwargs
111
+ )
112
+ result = self._runnable["locals"]["run"](value)
108
113
  retry += 1
109
- if 'locals_res' in self._runnable:
110
- result = self._runnable['locals_res']
114
+ if "locals_res" in self._runnable:
115
+ result = self._runnable["locals_res"]
111
116
  return result
112
117
 
113
118
 
@@ -129,14 +134,14 @@ class APIExecutor(Expression):
129
134
  def forward(self, request: Symbol, **_kwargs) -> Symbol:
130
135
  self._request = self._to_symbol(request)
131
136
  if self._verbose:
132
- UserMessage(f'[REQUEST] {self._request}')
137
+ UserMessage(f"[REQUEST] {self._request}")
133
138
  # Generate the code to implement the API call
134
- self._code = self.builder(self._request)
139
+ self._code = self.builder(self._request)
135
140
  if self._verbose:
136
- UserMessage(f'[GENERATED_CODE] {self._code}')
141
+ UserMessage(f"[GENERATED_CODE] {self._code}")
137
142
  # Execute the code to define the 'run' function
138
- self._result = self.executor(self._code, request=self._request)
143
+ self._result = self.executor(self._code, request=self._request)
139
144
  if self._verbose:
140
- UserMessage(f'[RESULT]: {self._result}')
141
- self._value = self._result
145
+ UserMessage(f"[RESULT]: {self._result}")
146
+ self._value = self._result
142
147
  return self
@@ -12,7 +12,9 @@ from .file_merger import FileMerger
12
12
 
13
13
 
14
14
  class ArxivPdfParser(Expression):
15
- def __init__(self, url_pattern: str = r'https://arxiv.org/(?:pdf|abs)/(\d+.\d+)(?:\.pdf)?', **kwargs):
15
+ def __init__(
16
+ self, url_pattern: str = r"https://arxiv.org/(?:pdf|abs)/(\d+.\d+)(?:\.pdf)?", **kwargs
17
+ ):
16
18
  super().__init__(**kwargs)
17
19
  self.url_pattern = url_pattern
18
20
  self.merger = FileMerger()
@@ -22,7 +24,11 @@ class ArxivPdfParser(Expression):
22
24
  urls = re.findall(self.url_pattern, str(data))
23
25
 
24
26
  # Convert all urls to pdf urls
25
- pdf_urls = ["https://arxiv.org/pdf/" + (f"{url.split('/')[-1]}.pdf" if 'pdf' not in url else {url.split('/')[-1]}) for url in urls]
27
+ pdf_urls = [
28
+ "https://arxiv.org/pdf/"
29
+ + (f"{url.split('/')[-1]}.pdf" if "pdf" not in url else {url.split("/")[-1]})
30
+ for url in urls
31
+ ]
26
32
 
27
33
  # Create temporary folder in the home directory
28
34
  output_path = HOME_PATH / "temp" / "downloads"
@@ -31,7 +37,9 @@ class ArxivPdfParser(Expression):
31
37
  pdf_files = []
32
38
  with ThreadPoolExecutor() as executor:
33
39
  # Download all pdfs in parallel
34
- future_to_url = {executor.submit(self.download_pdf, url, output_path): url for url in pdf_urls}
40
+ future_to_url = {
41
+ executor.submit(self.download_pdf, url, output_path): url for url in pdf_urls
42
+ }
35
43
  for future in as_completed(future_to_url):
36
44
  url = future_to_url[future]
37
45
  try:
@@ -56,7 +64,7 @@ class ArxivPdfParser(Expression):
56
64
  def download_pdf(self, url, output_path):
57
65
  # Download pdfs
58
66
  response = requests.get(url)
59
- file_path = Path(output_path) / f'{url.split("/")[-1]}'
60
- with file_path.open('wb') as f:
67
+ file_path = Path(output_path) / f"{url.split('/')[-1]}"
68
+ with file_path.open("wb") as f:
61
69
  f.write(response.content)
62
70
  return str(file_path)
@@ -68,7 +68,7 @@ Multimodal Few-Shot Learning with Frozen Language Models Maria Tsimpoukelli
68
68
 
69
69
  class BibTexPreProcessor(PreProcessor):
70
70
  def __call__(self, argument):
71
- return f'>>>\n{argument.args[0]!s}\n\n<<<\n'
71
+ return f">>>\n{argument.args[0]!s}\n\n<<<\n"
72
72
 
73
73
 
74
74
  class BibTexParser(Expression):
@@ -81,9 +81,13 @@ class BibTexParser(Expression):
81
81
  self.sym_return_type = BibTexParser
82
82
 
83
83
  def forward(self, sym: Symbol, **kwargs) -> Symbol:
84
- @core.zero_shot(prompt="Create bibtex entries:\n",
85
- pre_processors=[BibTexPreProcessor()],
86
- post_processors=[CodeExtractPostProcessor()], **kwargs)
84
+ @core.zero_shot(
85
+ prompt="Create bibtex entries:\n",
86
+ pre_processors=[BibTexPreProcessor()],
87
+ post_processors=[CodeExtractPostProcessor()],
88
+ **kwargs,
89
+ )
87
90
  def _func(_, text) -> str:
88
91
  pass
92
+
89
93
  return _func(self, sym)
@@ -19,22 +19,25 @@ _DEFAULT_TEXT_CONTAINER_FORMATTER = TextContainerFormatter(text_split=4)
19
19
  class CodeFormatter:
20
20
  def __call__(self, value: str, *_args: Any, **_kwds: Any) -> Any:
21
21
  # extract code from chat conversations or ```<language>\n{code}\n``` blocks
22
- return Symbol(value).extract('Only extract code without ``` block markers or chat conversations')
22
+ return Symbol(value).extract(
23
+ "Only extract code without ``` block markers or chat conversations"
24
+ )
23
25
 
24
26
 
25
27
  class Conversation(SlidingWindowStringConcatMemory):
26
28
  def __init__(
27
- self,
28
- init: str | None = None,
29
- file_link: list[str] | None = None,
30
- url_link: list[str] | None = None,
31
- index_name: str | None = None,
32
- auto_print: bool = True,
33
- truncation_percentage: float = 0.8,
34
- truncation_type: str = 'head',
35
- with_metadata: bool = False,
36
- *args, **kwargs
37
- ):
29
+ self,
30
+ init: str | None = None,
31
+ file_link: list[str] | None = None,
32
+ url_link: list[str] | None = None,
33
+ index_name: str | None = None,
34
+ auto_print: bool = True,
35
+ truncation_percentage: float = 0.8,
36
+ truncation_type: str = "head",
37
+ with_metadata: bool = False,
38
+ *args,
39
+ **kwargs,
40
+ ):
38
41
  super().__init__(*args, **kwargs)
39
42
  self.truncation_percentage = truncation_percentage
40
43
  self.truncation_type = truncation_type
@@ -48,9 +51,9 @@ class Conversation(SlidingWindowStringConcatMemory):
48
51
  self.index_name = index_name
49
52
  self.seo_opt = SEOQueryOptimizer()
50
53
  self.reader = FileReader(with_metadata=with_metadata)
51
- self.scraper = Interface('naive_webscraping')
52
- self.user_tag = 'USER::'
53
- self.bot_tag = 'ASSISTANT::'
54
+ self.scraper = Interface("naive_scrape")
55
+ self.user_tag = "USER::"
56
+ self.bot_tag = "ASSISTANT::"
54
57
 
55
58
  if init is not None:
56
59
  self.store_system_message(init, *args, **kwargs)
@@ -63,14 +66,16 @@ class Conversation(SlidingWindowStringConcatMemory):
63
66
  self.indexer = None
64
67
  self.index = None
65
68
  if index_name is not None:
66
- UserMessage("Index not supported for conversation class.", raise_with=NotImplementedError)
69
+ UserMessage(
70
+ "Index not supported for conversation class.", raise_with=NotImplementedError
71
+ )
67
72
 
68
73
  def __getstate__(self):
69
74
  state = super().__getstate__().copy()
70
- state.pop('seo_opt', None)
71
- state.pop('indexer', None)
72
- state.pop('index', None)
73
- state.pop('reader', None)
75
+ state.pop("seo_opt", None)
76
+ state.pop("indexer", None)
77
+ state.pop("index", None)
78
+ state.pop("reader", None)
74
79
  return state
75
80
 
76
81
  def __setstate__(self, state):
@@ -78,7 +83,9 @@ class Conversation(SlidingWindowStringConcatMemory):
78
83
  self.seo_opt = SEOQueryOptimizer()
79
84
  self.reader = FileReader()
80
85
  if self.index_name is not None:
81
- UserMessage("Index not supported for conversation class.", raise_with=NotImplementedError)
86
+ UserMessage(
87
+ "Index not supported for conversation class.", raise_with=NotImplementedError
88
+ )
82
89
 
83
90
  def store_system_message(self, message: str, *_args, **_kwargs):
84
91
  val = f"[SYSTEM_INSTRUCTION::]: <<<\n{message!s}\n>>>\n"
@@ -100,7 +107,7 @@ class Conversation(SlidingWindowStringConcatMemory):
100
107
  path_obj = Path(file_path)
101
108
  path_obj.parent.mkdir(parents=True, exist_ok=True)
102
109
  # Save the conversation object as a pickle file
103
- with path_obj.open('wb') as handle:
110
+ with path_obj.open("wb") as handle:
104
111
  pickle.dump(conversation, handle, protocol=pickle.HIGHEST_PROTOCOL)
105
112
 
106
113
  def load_conversation_state(self, path: str) -> "Conversation":
@@ -110,7 +117,7 @@ class Conversation(SlidingWindowStringConcatMemory):
110
117
  if path_obj.stat().st_size <= 0:
111
118
  UserMessage("File is empty.", raise_with=Exception)
112
119
  # Load the conversation object from a pickle file
113
- with path_obj.open('rb') as handle:
120
+ with path_obj.open("rb") as handle:
114
121
  conversation_state = pickle.load(handle)
115
122
  else:
116
123
  UserMessage("File does not exist or is empty.", raise_with=Exception)
@@ -123,14 +130,16 @@ class Conversation(SlidingWindowStringConcatMemory):
123
130
  self._memory = conversation_state._memory
124
131
  self.truncation_percentage = conversation_state.truncation_percentage
125
132
  self.truncation_type = conversation_state.truncation_type
126
- self.auto_print = conversation_state.auto_print
133
+ self.auto_print = conversation_state.auto_print
127
134
  self.file_link = conversation_state.file_link
128
135
  self.url_link = conversation_state.url_link
129
- self.index_name = conversation_state.index_name
136
+ self.index_name = conversation_state.index_name
130
137
  self.seo_opt = SEOQueryOptimizer()
131
138
  self.reader = FileReader()
132
139
  if self.index_name is not None:
133
- UserMessage("Index not supported for conversation class.", raise_with=NotImplementedError)
140
+ UserMessage(
141
+ "Index not supported for conversation class.", raise_with=NotImplementedError
142
+ )
134
143
  return self
135
144
 
136
145
  def commit(self, target_file: str | None = None, formatter: Callable | None = None):
@@ -143,20 +152,22 @@ class Conversation(SlidingWindowStringConcatMemory):
143
152
  elif isinstance(file_link, list) and len(file_link) == 1:
144
153
  file_link = file_link[0]
145
154
  else:
146
- file_link = None # cannot commit to multiple files
147
- UserMessage('Cannot commit to multiple files.', raise_with=Exception)
155
+ file_link = None # cannot commit to multiple files
156
+ UserMessage("Cannot commit to multiple files.", raise_with=Exception)
148
157
  if file_link:
149
158
  # if file extension is .py, then format code
150
159
  format_ = formatter
151
- formatter = CodeFormatter() if format_ is None and file_link.endswith('.py') else formatter
160
+ formatter = (
161
+ CodeFormatter() if format_ is None and file_link.endswith(".py") else formatter
162
+ )
152
163
  val = self.value
153
164
  if formatter:
154
165
  val = formatter(val)
155
166
  # if file does not exist, create it
156
- with Path(file_link).open('w') as file:
167
+ with Path(file_link).open("w") as file:
157
168
  file.write(str(val))
158
169
  else:
159
- UserMessage('File link is not set or a set of files.', raise_with=Exception)
170
+ UserMessage("File link is not set or a set of files.", raise_with=Exception)
160
171
 
161
172
  def save(self, path: str, replace: bool = False) -> Symbol:
162
173
  return Symbol(self._memory).save(path, replace=replace)
@@ -174,7 +185,7 @@ class Conversation(SlidingWindowStringConcatMemory):
174
185
  res = self.recall(query, *args, payload=payload, **kwargs)
175
186
 
176
187
  # if user is requesting to preview the response, then return only the preview result
177
- if kwargs.get('preview'):
188
+ if kwargs.get("preview"):
178
189
  if self.auto_print:
179
190
  UserMessage(str(res), style="text")
180
191
  return res
@@ -190,12 +201,14 @@ class Conversation(SlidingWindowStringConcatMemory):
190
201
  return res
191
202
 
192
203
  def _apply_truncation_overrides(self, kwargs: dict[str, Any]) -> dict[str, Any]:
193
- dynamic_truncation_percentage = kwargs.get('truncation_percentage', self.truncation_percentage)
194
- dynamic_truncation_type = kwargs.get('truncation_type', self.truncation_type)
204
+ dynamic_truncation_percentage = kwargs.get(
205
+ "truncation_percentage", self.truncation_percentage
206
+ )
207
+ dynamic_truncation_type = kwargs.get("truncation_type", self.truncation_type)
195
208
  return {
196
209
  **kwargs,
197
- 'truncation_percentage': dynamic_truncation_percentage,
198
- 'truncation_type': dynamic_truncation_type,
210
+ "truncation_percentage": dynamic_truncation_percentage,
211
+ "truncation_type": dynamic_truncation_type,
199
212
  }
200
213
 
201
214
  def _retrieve_index_memory(self, query: Symbol, args: tuple[Any, ...], kwargs: dict[str, Any]):
@@ -205,7 +218,7 @@ class Conversation(SlidingWindowStringConcatMemory):
205
218
  memory_split = self._memory.split(self.marker)
206
219
  memory_shards = []
207
220
  for shard in memory_split:
208
- if shard.strip() == '':
221
+ if shard.strip() == "":
209
222
  continue
210
223
  memory_shards.append(shard)
211
224
 
@@ -213,33 +226,33 @@ class Conversation(SlidingWindowStringConcatMemory):
213
226
  if length_memory_shards > 5:
214
227
  memory_shards = memory_shards[:2] + memory_shards[-3:]
215
228
  elif length_memory_shards > 3:
216
- retained = memory_shards[-(length_memory_shards - 2):]
229
+ retained = memory_shards[-(length_memory_shards - 2) :]
217
230
  memory_shards = memory_shards[:2] + retained
218
231
 
219
- search_query = query | '\n' | '\n'.join(memory_shards)
220
- if kwargs.get('use_seo_opt'):
221
- search_query = self.seo_opt('[Query]:' | search_query)
232
+ search_query = query | "\n" | "\n".join(memory_shards)
233
+ if kwargs.get("use_seo_opt"):
234
+ search_query = self.seo_opt("[Query]:" | search_query)
222
235
  memory = self.index(search_query, *args, **kwargs)
223
236
 
224
- if 'raw_result' in kwargs:
237
+ if "raw_result" in kwargs:
225
238
  UserMessage(str(memory), style="text")
226
239
  return memory
227
240
 
228
241
  def _build_payload(self, kwargs: dict[str, Any], memory) -> str:
229
- payload = ''
230
- if 'payload' in kwargs:
242
+ payload = ""
243
+ if "payload" in kwargs:
231
244
  payload = f"[Conversation Payload]:\n{kwargs.pop('payload')}\n"
232
245
 
233
- index_memory = ''
246
+ index_memory = ""
234
247
  if memory:
235
- index_memory = f'[Index Retrieval]:\n{str(memory)[:1500]}\n'
236
- return f'{index_memory}{payload}'
248
+ index_memory = f"[Index Retrieval]:\n{str(memory)[:1500]}\n"
249
+ return f"{index_memory}{payload}"
237
250
 
238
251
  def _append_interaction_to_memory(self, query: Symbol, res: Symbol) -> None:
239
252
  prompt = self.build_tag(self.user_tag, query)
240
253
  self.store(prompt)
241
254
 
242
- self._value = res.value # save last response
255
+ self._value = res.value # save last response
243
256
  val = self.build_tag(self.bot_tag, res)
244
257
  self.store(val)
245
258
 
@@ -276,27 +289,33 @@ Responses should be:
276
289
  - Referenced to source when applicable
277
290
  """
278
291
 
292
+
279
293
  @deprecated("Use `Conversation` instead for now. This will be removed/fixed in the future.")
280
294
  class RetrievalAugmentedConversation(Conversation):
281
295
  def __init__(
282
- self,
283
- folder_path: str | None = None,
284
- *,
285
- index_name: str | None = None,
286
- max_depth: int | None = 0,
287
- auto_print: bool = True,
288
- top_k: int = 5,
289
- formatter: Callable = _DEFAULT_TEXT_CONTAINER_FORMATTER,
290
- overwrite: bool = False,
291
- truncation_percentage: float = 0.8,
292
- truncation_type: str = 'head',
293
- with_metadata: bool = False,
294
- raw_result: bool | None = False,
295
- new_dim: int | None = None,
296
- **kwargs
297
- ):
298
-
299
- super().__init__(auto_print=auto_print, truncation_percentage=truncation_percentage, truncation_type=truncation_type, with_metadata=with_metadata, **kwargs)
296
+ self,
297
+ folder_path: str | None = None,
298
+ *,
299
+ index_name: str | None = None,
300
+ max_depth: int | None = 0,
301
+ auto_print: bool = True,
302
+ top_k: int = 5,
303
+ formatter: Callable = _DEFAULT_TEXT_CONTAINER_FORMATTER,
304
+ overwrite: bool = False,
305
+ truncation_percentage: float = 0.8,
306
+ truncation_type: str = "head",
307
+ with_metadata: bool = False,
308
+ raw_result: bool | None = False,
309
+ new_dim: int | None = None,
310
+ **kwargs,
311
+ ):
312
+ super().__init__(
313
+ auto_print=auto_print,
314
+ truncation_percentage=truncation_percentage,
315
+ truncation_type=truncation_type,
316
+ with_metadata=with_metadata,
317
+ **kwargs,
318
+ )
300
319
 
301
320
  self.retriever = DocumentRetriever(
302
321
  source=folder_path,
@@ -308,7 +327,7 @@ class RetrievalAugmentedConversation(Conversation):
308
327
  with_metadata=with_metadata,
309
328
  raw_result=raw_result,
310
329
  new_dim=new_dim,
311
- **kwargs
330
+ **kwargs,
312
331
  )
313
332
 
314
333
  self.index = self.retriever.index
@@ -333,14 +352,14 @@ class RetrievalAugmentedConversation(Conversation):
333
352
 
334
353
  memory = self.index(query, *args, **kwargs)
335
354
 
336
- if 'raw_result' in kwargs:
355
+ if "raw_result" in kwargs:
337
356
  UserMessage(str(memory), style="text")
338
357
  return memory
339
358
 
340
359
  prompt = self.build_tag(self.user_tag, query)
341
360
  self.store(prompt)
342
361
 
343
- payload = f'[Index Retrieval]:\n{str(memory)[:1500]}\n'
362
+ payload = f"[Index Retrieval]:\n{str(memory)[:1500]}\n"
344
363
 
345
364
  res = self.recall(query, *args, payload=payload, **kwargs)
346
365
 
@@ -15,22 +15,24 @@ _DEFAULT_PARAGRAPH_FORMATTER = ParagraphFormatter()
15
15
 
16
16
  class DocumentRetriever(Expression):
17
17
  def __init__(
18
- self,
19
- source: str | None = None,
20
- *,
21
- index_name: str = Indexer.DEFAULT,
22
- top_k: int = 5,
23
- max_depth: int = 1,
24
- formatter: Callable = _DEFAULT_PARAGRAPH_FORMATTER,
25
- overwrite: bool = False,
26
- with_metadata: bool = False,
27
- raw_result: bool | None = False,
28
- new_dim: int | None = None,
29
- **kwargs
30
- ):
18
+ self,
19
+ source: str | None = None,
20
+ *,
21
+ index_name: str = Indexer.DEFAULT,
22
+ top_k: int = 5,
23
+ max_depth: int = 1,
24
+ formatter: Callable = _DEFAULT_PARAGRAPH_FORMATTER,
25
+ overwrite: bool = False,
26
+ with_metadata: bool = False,
27
+ raw_result: bool | None = False,
28
+ new_dim: int | None = None,
29
+ **kwargs,
30
+ ):
31
31
  super().__init__(**kwargs)
32
- self.indexer = Indexer(index_name=index_name, top_k=top_k, formatter=formatter, auto_add=False, new_dim=new_dim)
33
- self.reader = FileReader(with_metadata=with_metadata)
32
+ self.indexer = Indexer(
33
+ index_name=index_name, top_k=top_k, formatter=formatter, auto_add=False, new_dim=new_dim
34
+ )
35
+ self.reader = FileReader(with_metadata=with_metadata)
34
36
  self.new_dim = new_dim
35
37
 
36
38
  if overwrite:
@@ -39,21 +41,23 @@ class DocumentRetriever(Expression):
39
41
  # we insert the text into the index if (1) index does not exist and (2) there's a specific source
40
42
  if source is not None and not self.indexer.exists():
41
43
  self.indexer.register()
42
- text = self.parse_source(source, with_metadata=with_metadata, max_depth=max_depth, **kwargs)
44
+ text = self.parse_source(
45
+ source, with_metadata=with_metadata, max_depth=max_depth, **kwargs
46
+ )
43
47
  self.index = self.indexer(data=text, raw_result=raw_result, **kwargs)
44
48
  else:
45
49
  # we don't insert the text at initialization since the index already exists and there's no specific source
46
50
  self.index = self.indexer(raw_result=raw_result, **kwargs)
47
51
 
48
52
  def forward(
49
- self,
50
- query: Symbol,
51
- raw_result: bool | None = False,
52
- ) -> Symbol:
53
+ self,
54
+ query: Symbol,
55
+ raw_result: bool | None = False,
56
+ ) -> Symbol:
53
57
  return self.index(
54
- query,
55
- raw_result=raw_result,
56
- )
58
+ query,
59
+ raw_result=raw_result,
60
+ )
57
61
 
58
62
  def insert(self, source: str | Path, **kwargs):
59
63
  # dynamically insert data into the index given a session
@@ -61,12 +65,19 @@ class DocumentRetriever(Expression):
61
65
  # - a string (e.g. something that the user wants to insert)
62
66
  # - a file path (e.g. a new file that the user wants to insert)
63
67
  # - a directory path (e.g. a new directory that the user wants to insert)
64
- text = self.parse_source(source, with_metadata=kwargs.get('with_metadata', False), max_depth=kwargs.get('max_depth', 1), **kwargs)
65
- #NOTE: Do we need `new_dim` here?
68
+ text = self.parse_source(
69
+ source,
70
+ with_metadata=kwargs.get("with_metadata", False),
71
+ max_depth=kwargs.get("max_depth", 1),
72
+ **kwargs,
73
+ )
74
+ # NOTE: Do we need `new_dim` here?
66
75
  self.add(text, index_name=self.indexer.index_name, **kwargs)
67
76
  self.config(None, save=True, index_name=self.indexer.index_name, **kwargs)
68
77
 
69
- def parse_source(self, source: str, with_metadata: bool, max_depth: int, **kwargs) -> list[Union[str, 'TextContainer']]:
78
+ def parse_source(
79
+ self, source: str, with_metadata: bool, max_depth: int, **kwargs
80
+ ) -> list[Union[str, "TextContainer"]]:
70
81
  maybe_path = Path(source)
71
82
  if isinstance(source, str) and not (maybe_path.is_file() or maybe_path.is_dir()):
72
83
  return Symbol(source).zip(new_dim=self.new_dim)
@@ -75,5 +86,7 @@ class DocumentRetriever(Expression):
75
86
  return self.reader(files, with_metadata=with_metadata, **kwargs)
76
87
  if maybe_path.is_file():
77
88
  return self.reader(source, with_metadata=with_metadata, **kwargs)
78
- UserMessage(f"Invalid source: {source}; must be a file, directory, or string", raise_with=ValueError)
89
+ UserMessage(
90
+ f"Invalid source: {source}; must be a file, directory, or string", raise_with=ValueError
91
+ )
79
92
  return []
@@ -12,12 +12,48 @@ class FileMerger(Expression):
12
12
  Class to merge contents of multiple files into one, specified by their file endings and root path.
13
13
  Files specified in the exclude list will not be included.
14
14
  """
15
- def __init__(self, file_endings: list[str] | None = None,
16
- file_excludes: list[str] | None = None, **kwargs):
15
+
16
+ def __init__(
17
+ self,
18
+ file_endings: list[str] | None = None,
19
+ file_excludes: list[str] | None = None,
20
+ **kwargs,
21
+ ):
17
22
  if file_excludes is None:
18
- file_excludes = ['__init__.py', '__pycache__', 'LICENSE', 'requirements.txt', 'environment.yaml', '.git']
23
+ file_excludes = [
24
+ "__init__.py",
25
+ "__pycache__",
26
+ "LICENSE",
27
+ "requirements.txt",
28
+ "environment.yaml",
29
+ ".git",
30
+ ]
19
31
  if file_endings is None:
20
- file_endings = ['.py', '.md', '.txt', '.sh', '.pdf', '.json', '.yaml', '.java', '.cpp', '.hpp', '.c', '.h', '.js', '.css', '.html', '.xml', '.csv', '.tsv', '.yml', '.rst', '.ipynb', '.tex', '.bib']
32
+ file_endings = [
33
+ ".py",
34
+ ".md",
35
+ ".txt",
36
+ ".sh",
37
+ ".pdf",
38
+ ".json",
39
+ ".yaml",
40
+ ".java",
41
+ ".cpp",
42
+ ".hpp",
43
+ ".c",
44
+ ".h",
45
+ ".js",
46
+ ".css",
47
+ ".html",
48
+ ".xml",
49
+ ".csv",
50
+ ".tsv",
51
+ ".yml",
52
+ ".rst",
53
+ ".ipynb",
54
+ ".tex",
55
+ ".bib",
56
+ ]
21
57
  super().__init__(**kwargs)
22
58
  self.file_endings = file_endings
23
59
  self.file_excludes = file_excludes
@@ -56,9 +92,11 @@ class FileMerger(Expression):
56
92
  file_path_escaped = file_path_str.replace(" ", "\\ ")
57
93
 
58
94
  # Append start and end markers for each file
59
- file_content = f"# ----[FILE_START]<PART1/1>{file_path_escaped}[FILE_CONTENT]:\n" + \
60
- file_content + \
61
- f"\n# ----[FILE_END]{file_path_escaped}\n"
95
+ file_content = (
96
+ f"# ----[FILE_START]<PART1/1>{file_path_escaped}[FILE_CONTENT]:\n"
97
+ + file_content
98
+ + f"\n# ----[FILE_END]{file_path_escaped}\n"
99
+ )
62
100
 
63
101
  # Merge the file contents
64
102
  merged_file += file_content