symbolicai 0.20.2__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- symai/__init__.py +96 -64
- symai/backend/base.py +93 -80
- symai/backend/engines/drawing/engine_bfl.py +12 -11
- symai/backend/engines/drawing/engine_gpt_image.py +108 -87
- symai/backend/engines/embedding/engine_llama_cpp.py +25 -28
- symai/backend/engines/embedding/engine_openai.py +3 -5
- symai/backend/engines/execute/engine_python.py +6 -5
- symai/backend/engines/files/engine_io.py +74 -67
- symai/backend/engines/imagecaptioning/engine_blip2.py +3 -3
- symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +54 -38
- symai/backend/engines/index/engine_pinecone.py +23 -24
- symai/backend/engines/index/engine_vectordb.py +16 -14
- symai/backend/engines/lean/engine_lean4.py +38 -34
- symai/backend/engines/neurosymbolic/__init__.py +41 -13
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +262 -182
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +263 -191
- symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +53 -49
- symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +212 -211
- symai/backend/engines/neurosymbolic/engine_groq.py +87 -63
- symai/backend/engines/neurosymbolic/engine_huggingface.py +21 -24
- symai/backend/engines/neurosymbolic/engine_llama_cpp.py +117 -48
- symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +256 -229
- symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +270 -150
- symai/backend/engines/ocr/engine_apilayer.py +6 -8
- symai/backend/engines/output/engine_stdout.py +1 -4
- symai/backend/engines/search/engine_openai.py +7 -7
- symai/backend/engines/search/engine_perplexity.py +5 -5
- symai/backend/engines/search/engine_serpapi.py +12 -14
- symai/backend/engines/speech_to_text/engine_local_whisper.py +20 -27
- symai/backend/engines/symbolic/engine_wolframalpha.py +3 -3
- symai/backend/engines/text_to_speech/engine_openai.py +5 -7
- symai/backend/engines/text_vision/engine_clip.py +7 -11
- symai/backend/engines/userinput/engine_console.py +3 -3
- symai/backend/engines/webscraping/engine_requests.py +81 -48
- symai/backend/mixin/__init__.py +13 -0
- symai/backend/mixin/anthropic.py +4 -2
- symai/backend/mixin/deepseek.py +2 -0
- symai/backend/mixin/google.py +2 -0
- symai/backend/mixin/openai.py +11 -3
- symai/backend/settings.py +83 -16
- symai/chat.py +101 -78
- symai/collect/__init__.py +7 -1
- symai/collect/dynamic.py +77 -69
- symai/collect/pipeline.py +35 -27
- symai/collect/stats.py +75 -63
- symai/components.py +198 -169
- symai/constraints.py +15 -12
- symai/core.py +698 -359
- symai/core_ext.py +32 -34
- symai/endpoints/api.py +80 -73
- symai/extended/.DS_Store +0 -0
- symai/extended/__init__.py +46 -12
- symai/extended/api_builder.py +11 -8
- symai/extended/arxiv_pdf_parser.py +13 -12
- symai/extended/bibtex_parser.py +2 -3
- symai/extended/conversation.py +101 -90
- symai/extended/document.py +17 -10
- symai/extended/file_merger.py +18 -13
- symai/extended/graph.py +18 -13
- symai/extended/html_style_template.py +2 -4
- symai/extended/interfaces/blip_2.py +1 -2
- symai/extended/interfaces/clip.py +1 -2
- symai/extended/interfaces/console.py +7 -1
- symai/extended/interfaces/dall_e.py +1 -1
- symai/extended/interfaces/flux.py +1 -1
- symai/extended/interfaces/gpt_image.py +1 -1
- symai/extended/interfaces/input.py +1 -1
- symai/extended/interfaces/llava.py +0 -1
- symai/extended/interfaces/naive_vectordb.py +7 -8
- symai/extended/interfaces/naive_webscraping.py +1 -1
- symai/extended/interfaces/ocr.py +1 -1
- symai/extended/interfaces/pinecone.py +6 -5
- symai/extended/interfaces/serpapi.py +1 -1
- symai/extended/interfaces/terminal.py +2 -3
- symai/extended/interfaces/tts.py +1 -1
- symai/extended/interfaces/whisper.py +1 -1
- symai/extended/interfaces/wolframalpha.py +1 -1
- symai/extended/metrics/__init__.py +11 -1
- symai/extended/metrics/similarity.py +11 -13
- symai/extended/os_command.py +17 -16
- symai/extended/packages/__init__.py +29 -3
- symai/extended/packages/symdev.py +19 -16
- symai/extended/packages/sympkg.py +12 -9
- symai/extended/packages/symrun.py +21 -19
- symai/extended/repo_cloner.py +11 -10
- symai/extended/seo_query_optimizer.py +1 -2
- symai/extended/solver.py +20 -23
- symai/extended/summarizer.py +4 -3
- symai/extended/taypan_interpreter.py +10 -12
- symai/extended/vectordb.py +99 -82
- symai/formatter/__init__.py +9 -1
- symai/formatter/formatter.py +12 -16
- symai/formatter/regex.py +62 -63
- symai/functional.py +176 -122
- symai/imports.py +136 -127
- symai/interfaces.py +56 -27
- symai/memory.py +14 -13
- symai/misc/console.py +49 -39
- symai/misc/loader.py +5 -3
- symai/models/__init__.py +17 -1
- symai/models/base.py +269 -181
- symai/models/errors.py +0 -1
- symai/ops/__init__.py +32 -22
- symai/ops/measures.py +11 -15
- symai/ops/primitives.py +348 -228
- symai/post_processors.py +32 -28
- symai/pre_processors.py +39 -41
- symai/processor.py +6 -4
- symai/prompts.py +59 -45
- symai/server/huggingface_server.py +23 -20
- symai/server/llama_cpp_server.py +7 -5
- symai/shell.py +3 -4
- symai/shellsv.py +499 -375
- symai/strategy.py +517 -287
- symai/symbol.py +111 -116
- symai/utils.py +42 -36
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/METADATA +4 -2
- symbolicai-1.0.0.dist-info/RECORD +163 -0
- symbolicai-0.20.2.dist-info/RECORD +0 -162
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/WHEEL +0 -0
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/entry_points.txt +0 -0
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -1,13 +1,14 @@
|
|
|
1
|
-
import os
|
|
2
1
|
import re
|
|
3
2
|
import shutil
|
|
4
|
-
import requests
|
|
5
|
-
|
|
6
3
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import requests
|
|
7
7
|
|
|
8
|
+
from ..backend.settings import HOME_PATH
|
|
8
9
|
from ..symbol import Expression, Symbol
|
|
10
|
+
from ..utils import UserMessage
|
|
9
11
|
from .file_merger import FileMerger
|
|
10
|
-
from ..backend.settings import HOME_PATH
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
class ArxivPdfParser(Expression):
|
|
@@ -21,11 +22,11 @@ class ArxivPdfParser(Expression):
|
|
|
21
22
|
urls = re.findall(self.url_pattern, str(data))
|
|
22
23
|
|
|
23
24
|
# Convert all urls to pdf urls
|
|
24
|
-
pdf_urls = [
|
|
25
|
+
pdf_urls = ["https://arxiv.org/pdf/" + (f"{url.split('/')[-1]}.pdf" if 'pdf' not in url else {url.split('/')[-1]}) for url in urls]
|
|
25
26
|
|
|
26
27
|
# Create temporary folder in the home directory
|
|
27
|
-
output_path =
|
|
28
|
-
|
|
28
|
+
output_path = HOME_PATH / "temp" / "downloads"
|
|
29
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
29
30
|
|
|
30
31
|
pdf_files = []
|
|
31
32
|
with ThreadPoolExecutor() as executor:
|
|
@@ -36,13 +37,13 @@ class ArxivPdfParser(Expression):
|
|
|
36
37
|
try:
|
|
37
38
|
pdf_files.append(future.result())
|
|
38
39
|
except Exception as exc:
|
|
39
|
-
|
|
40
|
+
UserMessage(f"{url!r} generated an exception: {exc}")
|
|
40
41
|
|
|
41
42
|
if len(pdf_files) == 0:
|
|
42
43
|
return None
|
|
43
44
|
|
|
44
45
|
# Merge all pdfs into one file
|
|
45
|
-
merged_file = self.merger(output_path, **kwargs)
|
|
46
|
+
merged_file = self.merger(str(output_path), **kwargs)
|
|
46
47
|
|
|
47
48
|
# Return the merged file as a Symbol
|
|
48
49
|
return_file = self._to_symbol(merged_file)
|
|
@@ -55,7 +56,7 @@ class ArxivPdfParser(Expression):
|
|
|
55
56
|
def download_pdf(self, url, output_path):
|
|
56
57
|
# Download pdfs
|
|
57
58
|
response = requests.get(url)
|
|
58
|
-
|
|
59
|
-
with open(
|
|
59
|
+
file_path = Path(output_path) / f'{url.split("/")[-1]}'
|
|
60
|
+
with file_path.open('wb') as f:
|
|
60
61
|
f.write(response.content)
|
|
61
|
-
return
|
|
62
|
+
return str(file_path)
|
symai/extended/bibtex_parser.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
from .. import core
|
|
2
|
+
from ..post_processors import CodeExtractPostProcessor
|
|
2
3
|
from ..pre_processors import PreProcessor
|
|
3
4
|
from ..symbol import Expression, Symbol
|
|
4
|
-
from ..post_processors import CodeExtractPostProcessor
|
|
5
|
-
|
|
6
5
|
|
|
7
6
|
BIB_DESCRIPTION = """[Description]
|
|
8
7
|
You take in a text with references to papers and return a list of biblatex entries.
|
|
@@ -69,7 +68,7 @@ Multimodal Few-Shot Learning with Frozen Language Models Maria Tsimpoukelli
|
|
|
69
68
|
|
|
70
69
|
class BibTexPreProcessor(PreProcessor):
|
|
71
70
|
def __call__(self, argument):
|
|
72
|
-
return '>>>\n{}\n\n<<<\n'
|
|
71
|
+
return f'>>>\n{argument.args[0]!s}\n\n<<<\n'
|
|
73
72
|
|
|
74
73
|
|
|
75
74
|
class BibTexParser(Expression):
|
symai/extended/conversation.py
CHANGED
|
@@ -1,21 +1,23 @@
|
|
|
1
|
-
import os
|
|
2
1
|
import pickle
|
|
2
|
+
from collections.abc import Callable
|
|
3
3
|
from datetime import datetime
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import Any
|
|
6
6
|
|
|
7
|
-
from ..components import FileReader
|
|
7
|
+
from ..components import FileReader
|
|
8
8
|
from ..formatter import TextContainerFormatter
|
|
9
9
|
from ..interfaces import Interface
|
|
10
10
|
from ..memory import SlidingWindowStringConcatMemory
|
|
11
11
|
from ..symbol import Symbol
|
|
12
|
-
from ..utils import
|
|
12
|
+
from ..utils import UserMessage, deprecated
|
|
13
13
|
from .document import DocumentRetriever
|
|
14
14
|
from .seo_query_optimizer import SEOQueryOptimizer
|
|
15
15
|
|
|
16
|
+
_DEFAULT_TEXT_CONTAINER_FORMATTER = TextContainerFormatter(text_split=4)
|
|
17
|
+
|
|
16
18
|
|
|
17
19
|
class CodeFormatter:
|
|
18
|
-
def __call__(self, value: str, *
|
|
20
|
+
def __call__(self, value: str, *_args: Any, **_kwds: Any) -> Any:
|
|
19
21
|
# extract code from chat conversations or ```<language>\n{code}\n``` blocks
|
|
20
22
|
return Symbol(value).extract('Only extract code without ``` block markers or chat conversations')
|
|
21
23
|
|
|
@@ -23,10 +25,10 @@ class CodeFormatter:
|
|
|
23
25
|
class Conversation(SlidingWindowStringConcatMemory):
|
|
24
26
|
def __init__(
|
|
25
27
|
self,
|
|
26
|
-
init:
|
|
27
|
-
file_link:
|
|
28
|
-
url_link:
|
|
29
|
-
index_name:
|
|
28
|
+
init: str | None = None,
|
|
29
|
+
file_link: list[str] | None = None,
|
|
30
|
+
url_link: list[str] | None = None,
|
|
31
|
+
index_name: str | None = None,
|
|
30
32
|
auto_print: bool = True,
|
|
31
33
|
truncation_percentage: float = 0.8,
|
|
32
34
|
truncation_type: str = 'head',
|
|
@@ -61,7 +63,7 @@ class Conversation(SlidingWindowStringConcatMemory):
|
|
|
61
63
|
self.indexer = None
|
|
62
64
|
self.index = None
|
|
63
65
|
if index_name is not None:
|
|
64
|
-
|
|
66
|
+
UserMessage("Index not supported for conversation class.", raise_with=NotImplementedError)
|
|
65
67
|
|
|
66
68
|
def __getstate__(self):
|
|
67
69
|
state = super().__getstate__().copy()
|
|
@@ -76,41 +78,42 @@ class Conversation(SlidingWindowStringConcatMemory):
|
|
|
76
78
|
self.seo_opt = SEOQueryOptimizer()
|
|
77
79
|
self.reader = FileReader()
|
|
78
80
|
if self.index_name is not None:
|
|
79
|
-
|
|
81
|
+
UserMessage("Index not supported for conversation class.", raise_with=NotImplementedError)
|
|
80
82
|
|
|
81
|
-
def store_system_message(self, message: str, *
|
|
82
|
-
val = f"[SYSTEM_INSTRUCTION::]: <<<\n{
|
|
83
|
+
def store_system_message(self, message: str, *_args, **_kwargs):
|
|
84
|
+
val = f"[SYSTEM_INSTRUCTION::]: <<<\n{message!s}\n>>>\n"
|
|
83
85
|
self.store(val)
|
|
84
86
|
|
|
85
|
-
def store_file(self, file_path: str, *
|
|
87
|
+
def store_file(self, file_path: str, *_args, **_kwargs):
|
|
86
88
|
content = self.reader(file_path)
|
|
87
|
-
val = f"[DATA::{file_path}]: <<<\n{
|
|
89
|
+
val = f"[DATA::{file_path}]: <<<\n{content!s}\n>>>\n"
|
|
88
90
|
self.store(val)
|
|
89
91
|
|
|
90
|
-
def store_url(self, url: str, *
|
|
92
|
+
def store_url(self, url: str, *_args, **_kwargs):
|
|
91
93
|
content = self.scraper(url)
|
|
92
|
-
val = f"[DATA::{url}]: <<<\n{
|
|
94
|
+
val = f"[DATA::{url}]: <<<\n{content!s}\n>>>\n"
|
|
93
95
|
self.store(val)
|
|
94
96
|
|
|
95
97
|
@staticmethod
|
|
96
98
|
def save_conversation_state(conversation: "Conversation", file_path: str) -> None:
|
|
97
99
|
# Check if path exists and create it if it doesn't
|
|
98
|
-
|
|
99
|
-
|
|
100
|
+
path_obj = Path(file_path)
|
|
101
|
+
path_obj.parent.mkdir(parents=True, exist_ok=True)
|
|
100
102
|
# Save the conversation object as a pickle file
|
|
101
|
-
with open(
|
|
103
|
+
with path_obj.open('wb') as handle:
|
|
102
104
|
pickle.dump(conversation, handle, protocol=pickle.HIGHEST_PROTOCOL)
|
|
103
105
|
|
|
104
106
|
def load_conversation_state(self, path: str) -> "Conversation":
|
|
105
107
|
# Check if the file exists and it's not empty
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
108
|
+
path_obj = Path(path)
|
|
109
|
+
if path_obj.exists():
|
|
110
|
+
if path_obj.stat().st_size <= 0:
|
|
111
|
+
UserMessage("File is empty.", raise_with=Exception)
|
|
109
112
|
# Load the conversation object from a pickle file
|
|
110
|
-
with open(
|
|
113
|
+
with path_obj.open('rb') as handle:
|
|
111
114
|
conversation_state = pickle.load(handle)
|
|
112
115
|
else:
|
|
113
|
-
|
|
116
|
+
UserMessage("File does not exist or is empty.", raise_with=Exception)
|
|
114
117
|
|
|
115
118
|
# Create a new instance of the `Conversation` class and restore
|
|
116
119
|
# the state from the saved conversation
|
|
@@ -127,10 +130,10 @@ class Conversation(SlidingWindowStringConcatMemory):
|
|
|
127
130
|
self.seo_opt = SEOQueryOptimizer()
|
|
128
131
|
self.reader = FileReader()
|
|
129
132
|
if self.index_name is not None:
|
|
130
|
-
|
|
133
|
+
UserMessage("Index not supported for conversation class.", raise_with=NotImplementedError)
|
|
131
134
|
return self
|
|
132
135
|
|
|
133
|
-
def commit(self, target_file: str = None, formatter:
|
|
136
|
+
def commit(self, target_file: str | None = None, formatter: Callable | None = None):
|
|
134
137
|
if target_file and isinstance(target_file, str):
|
|
135
138
|
file_link = target_file
|
|
136
139
|
else:
|
|
@@ -141,7 +144,7 @@ class Conversation(SlidingWindowStringConcatMemory):
|
|
|
141
144
|
file_link = file_link[0]
|
|
142
145
|
else:
|
|
143
146
|
file_link = None # cannot commit to multiple files
|
|
144
|
-
|
|
147
|
+
UserMessage('Cannot commit to multiple files.', raise_with=Exception)
|
|
145
148
|
if file_link:
|
|
146
149
|
# if file extension is .py, then format code
|
|
147
150
|
format_ = formatter
|
|
@@ -150,10 +153,10 @@ class Conversation(SlidingWindowStringConcatMemory):
|
|
|
150
153
|
if formatter:
|
|
151
154
|
val = formatter(val)
|
|
152
155
|
# if file does not exist, create it
|
|
153
|
-
with open(
|
|
156
|
+
with Path(file_link).open('w') as file:
|
|
154
157
|
file.write(str(val))
|
|
155
158
|
else:
|
|
156
|
-
|
|
159
|
+
UserMessage('File link is not set or a set of files.', raise_with=Exception)
|
|
157
160
|
|
|
158
161
|
def save(self, path: str, replace: bool = False) -> Symbol:
|
|
159
162
|
return Symbol(self._memory).save(path, replace=replace)
|
|
@@ -161,64 +164,78 @@ class Conversation(SlidingWindowStringConcatMemory):
|
|
|
161
164
|
def build_tag(self, tag: str, query: str) -> str:
|
|
162
165
|
# get timestamp in string format
|
|
163
166
|
timestamp = datetime.now().strftime("%d/%m/%Y %H:%M:%S:%f")
|
|
164
|
-
return str(f"[{tag}{timestamp}]: <<<\n{
|
|
167
|
+
return str(f"[{tag}{timestamp}]: <<<\n{query!s}\n>>>\n")
|
|
165
168
|
|
|
166
169
|
def forward(self, query: str, *args, **kwargs):
|
|
167
|
-
|
|
168
|
-
dynamic_truncation_percentage = kwargs.get('truncation_percentage', self.truncation_percentage)
|
|
169
|
-
dynamic_truncation_type = kwargs.get('truncation_type', self.truncation_type)
|
|
170
|
-
kwargs = {**kwargs, 'truncation_percentage': dynamic_truncation_percentage, 'truncation_type': dynamic_truncation_type}
|
|
171
|
-
|
|
170
|
+
kwargs = self._apply_truncation_overrides(kwargs)
|
|
172
171
|
query = self._to_symbol(query)
|
|
173
|
-
memory =
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
172
|
+
memory = self._retrieve_index_memory(query, args, kwargs)
|
|
173
|
+
payload = self._build_payload(kwargs, memory)
|
|
174
|
+
res = self.recall(query, *args, payload=payload, **kwargs)
|
|
175
|
+
|
|
176
|
+
# if user is requesting to preview the response, then return only the preview result
|
|
177
|
+
if kwargs.get('preview'):
|
|
178
|
+
if self.auto_print:
|
|
179
|
+
UserMessage(str(res), style="text")
|
|
180
|
+
return res
|
|
181
|
+
|
|
182
|
+
### --- asses memory update --- ###
|
|
183
|
+
|
|
184
|
+
self._append_interaction_to_memory(query, res)
|
|
185
|
+
|
|
186
|
+
# WARN: DO NOT PROCESS THE RES BY REMOVING `<<<` AND `>>>` TAGS
|
|
187
|
+
|
|
188
|
+
if self.auto_print:
|
|
189
|
+
UserMessage(str(res), style="text")
|
|
190
|
+
return res
|
|
190
191
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
192
|
+
def _apply_truncation_overrides(self, kwargs: dict[str, Any]) -> dict[str, Any]:
|
|
193
|
+
dynamic_truncation_percentage = kwargs.get('truncation_percentage', self.truncation_percentage)
|
|
194
|
+
dynamic_truncation_type = kwargs.get('truncation_type', self.truncation_type)
|
|
195
|
+
return {
|
|
196
|
+
**kwargs,
|
|
197
|
+
'truncation_percentage': dynamic_truncation_percentage,
|
|
198
|
+
'truncation_type': dynamic_truncation_type,
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
def _retrieve_index_memory(self, query: Symbol, args: tuple[Any, ...], kwargs: dict[str, Any]):
|
|
202
|
+
if self.index is None:
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
memory_split = self._memory.split(self.marker)
|
|
206
|
+
memory_shards = []
|
|
207
|
+
for shard in memory_split:
|
|
208
|
+
if shard.strip() == '':
|
|
209
|
+
continue
|
|
210
|
+
memory_shards.append(shard)
|
|
211
|
+
|
|
212
|
+
length_memory_shards = len(memory_shards)
|
|
213
|
+
if length_memory_shards > 5:
|
|
214
|
+
memory_shards = memory_shards[:2] + memory_shards[-3:]
|
|
215
|
+
elif length_memory_shards > 3:
|
|
216
|
+
retained = memory_shards[-(length_memory_shards - 2):]
|
|
217
|
+
memory_shards = memory_shards[:2] + retained
|
|
218
|
+
|
|
219
|
+
search_query = query | '\n' | '\n'.join(memory_shards)
|
|
220
|
+
if kwargs.get('use_seo_opt'):
|
|
221
|
+
search_query = self.seo_opt('[Query]:' | search_query)
|
|
222
|
+
memory = self.index(search_query, *args, **kwargs)
|
|
195
223
|
|
|
196
|
-
|
|
197
|
-
|
|
224
|
+
if 'raw_result' in kwargs:
|
|
225
|
+
UserMessage(str(memory), style="text")
|
|
226
|
+
return memory
|
|
198
227
|
|
|
228
|
+
def _build_payload(self, kwargs: dict[str, Any], memory) -> str:
|
|
199
229
|
payload = ''
|
|
200
|
-
# if payload is set, then add it to the memory
|
|
201
230
|
if 'payload' in kwargs:
|
|
202
|
-
payload
|
|
231
|
+
payload = f"[Conversation Payload]:\n{kwargs.pop('payload')}\n"
|
|
203
232
|
|
|
204
233
|
index_memory = ''
|
|
205
|
-
# if index is set, then add it to the memory
|
|
206
234
|
if memory:
|
|
207
235
|
index_memory = f'[Index Retrieval]:\n{str(memory)[:1500]}\n'
|
|
236
|
+
return f'{index_memory}{payload}'
|
|
208
237
|
|
|
209
|
-
|
|
210
|
-
# perform a recall function using the query
|
|
211
|
-
res = self.recall(query, *args, payload=payload, **kwargs)
|
|
212
|
-
|
|
213
|
-
# if user is requesting to preview the response, then return only the preview result
|
|
214
|
-
if 'preview' in kwargs and kwargs['preview']:
|
|
215
|
-
if self.auto_print:
|
|
216
|
-
print(res)
|
|
217
|
-
return res
|
|
218
|
-
|
|
219
|
-
### --- asses memory update --- ###
|
|
220
|
-
|
|
221
|
-
# append the bot prompt to the memory
|
|
238
|
+
def _append_interaction_to_memory(self, query: Symbol, res: Symbol) -> None:
|
|
222
239
|
prompt = self.build_tag(self.user_tag, query)
|
|
223
240
|
self.store(prompt)
|
|
224
241
|
|
|
@@ -226,12 +243,6 @@ class Conversation(SlidingWindowStringConcatMemory):
|
|
|
226
243
|
val = self.build_tag(self.bot_tag, res)
|
|
227
244
|
self.store(val)
|
|
228
245
|
|
|
229
|
-
# WARN: DO NOT PROCESS THE RES BY REMOVING `<<<` AND `>>>` TAGS
|
|
230
|
-
|
|
231
|
-
if self.auto_print:
|
|
232
|
-
print(res)
|
|
233
|
-
return res
|
|
234
|
-
|
|
235
246
|
|
|
236
247
|
RETRIEVAL_CONTEXT = """[Description]
|
|
237
248
|
This is a conversation between a retrieval augmented indexing program and a user. The system combines document retrieval with conversational AI to provide context-aware responses. It can:
|
|
@@ -269,23 +280,23 @@ Responses should be:
|
|
|
269
280
|
class RetrievalAugmentedConversation(Conversation):
|
|
270
281
|
def __init__(
|
|
271
282
|
self,
|
|
272
|
-
folder_path:
|
|
283
|
+
folder_path: str | None = None,
|
|
273
284
|
*,
|
|
274
|
-
index_name:
|
|
275
|
-
max_depth:
|
|
285
|
+
index_name: str | None = None,
|
|
286
|
+
max_depth: int | None = 0,
|
|
276
287
|
auto_print: bool = True,
|
|
277
288
|
top_k: int = 5,
|
|
278
|
-
formatter: Callable =
|
|
289
|
+
formatter: Callable = _DEFAULT_TEXT_CONTAINER_FORMATTER,
|
|
279
290
|
overwrite: bool = False,
|
|
280
291
|
truncation_percentage: float = 0.8,
|
|
281
292
|
truncation_type: str = 'head',
|
|
282
293
|
with_metadata: bool = False,
|
|
283
|
-
raw_result:
|
|
284
|
-
new_dim:
|
|
294
|
+
raw_result: bool | None = False,
|
|
295
|
+
new_dim: int | None = None,
|
|
285
296
|
**kwargs
|
|
286
297
|
):
|
|
287
298
|
|
|
288
|
-
super().__init__(auto_print=auto_print, truncation_percentage=truncation_percentage, truncation_type=truncation_type, with_metadata=with_metadata,
|
|
299
|
+
super().__init__(auto_print=auto_print, truncation_percentage=truncation_percentage, truncation_type=truncation_type, with_metadata=with_metadata, **kwargs)
|
|
289
300
|
|
|
290
301
|
self.retriever = DocumentRetriever(
|
|
291
302
|
source=folder_path,
|
|
@@ -323,7 +334,7 @@ class RetrievalAugmentedConversation(Conversation):
|
|
|
323
334
|
memory = self.index(query, *args, **kwargs)
|
|
324
335
|
|
|
325
336
|
if 'raw_result' in kwargs:
|
|
326
|
-
|
|
337
|
+
UserMessage(str(memory), style="text")
|
|
327
338
|
return memory
|
|
328
339
|
|
|
329
340
|
prompt = self.build_tag(self.user_tag, query)
|
|
@@ -338,5 +349,5 @@ class RetrievalAugmentedConversation(Conversation):
|
|
|
338
349
|
self.store(val)
|
|
339
350
|
|
|
340
351
|
if self.auto_print:
|
|
341
|
-
|
|
352
|
+
UserMessage(str(res), style="text")
|
|
342
353
|
return res
|
symai/extended/document.py
CHANGED
|
@@ -1,25 +1,31 @@
|
|
|
1
|
-
import
|
|
1
|
+
from collections.abc import Callable
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import TYPE_CHECKING, Union
|
|
4
4
|
|
|
5
5
|
from ..components import FileReader, Indexer
|
|
6
6
|
from ..formatter import ParagraphFormatter
|
|
7
7
|
from ..symbol import Expression, Symbol
|
|
8
|
+
from ..utils import UserMessage
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from ..backend.engines.files.engine_io import TextContainer
|
|
12
|
+
|
|
13
|
+
_DEFAULT_PARAGRAPH_FORMATTER = ParagraphFormatter()
|
|
8
14
|
|
|
9
15
|
|
|
10
16
|
class DocumentRetriever(Expression):
|
|
11
17
|
def __init__(
|
|
12
18
|
self,
|
|
13
|
-
source:
|
|
19
|
+
source: str | None = None,
|
|
14
20
|
*,
|
|
15
21
|
index_name: str = Indexer.DEFAULT,
|
|
16
22
|
top_k: int = 5,
|
|
17
23
|
max_depth: int = 1,
|
|
18
|
-
formatter: Callable =
|
|
24
|
+
formatter: Callable = _DEFAULT_PARAGRAPH_FORMATTER,
|
|
19
25
|
overwrite: bool = False,
|
|
20
26
|
with_metadata: bool = False,
|
|
21
|
-
raw_result:
|
|
22
|
-
new_dim:
|
|
27
|
+
raw_result: bool | None = False,
|
|
28
|
+
new_dim: int | None = None,
|
|
23
29
|
**kwargs
|
|
24
30
|
):
|
|
25
31
|
super().__init__(**kwargs)
|
|
@@ -42,14 +48,14 @@ class DocumentRetriever(Expression):
|
|
|
42
48
|
def forward(
|
|
43
49
|
self,
|
|
44
50
|
query: Symbol,
|
|
45
|
-
raw_result:
|
|
51
|
+
raw_result: bool | None = False,
|
|
46
52
|
) -> Symbol:
|
|
47
53
|
return self.index(
|
|
48
54
|
query,
|
|
49
55
|
raw_result=raw_result,
|
|
50
56
|
)
|
|
51
57
|
|
|
52
|
-
def insert(self, source:
|
|
58
|
+
def insert(self, source: str | Path, **kwargs):
|
|
53
59
|
# dynamically insert data into the index given a session
|
|
54
60
|
# the data can be:
|
|
55
61
|
# - a string (e.g. something that the user wants to insert)
|
|
@@ -60,7 +66,7 @@ class DocumentRetriever(Expression):
|
|
|
60
66
|
self.add(text, index_name=self.indexer.index_name, **kwargs)
|
|
61
67
|
self.config(None, save=True, index_name=self.indexer.index_name, **kwargs)
|
|
62
68
|
|
|
63
|
-
def parse_source(self, source: str, with_metadata: bool, max_depth: int, **kwargs) ->
|
|
69
|
+
def parse_source(self, source: str, with_metadata: bool, max_depth: int, **kwargs) -> list[Union[str, 'TextContainer']]:
|
|
64
70
|
maybe_path = Path(source)
|
|
65
71
|
if isinstance(source, str) and not (maybe_path.is_file() or maybe_path.is_dir()):
|
|
66
72
|
return Symbol(source).zip(new_dim=self.new_dim)
|
|
@@ -69,4 +75,5 @@ class DocumentRetriever(Expression):
|
|
|
69
75
|
return self.reader(files, with_metadata=with_metadata, **kwargs)
|
|
70
76
|
if maybe_path.is_file():
|
|
71
77
|
return self.reader(source, with_metadata=with_metadata, **kwargs)
|
|
72
|
-
|
|
78
|
+
UserMessage(f"Invalid source: {source}; must be a file, directory, or string", raise_with=ValueError)
|
|
79
|
+
return []
|
symai/extended/file_merger.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from pathlib import Path
|
|
2
3
|
|
|
3
4
|
from tqdm import tqdm
|
|
4
|
-
from typing import List
|
|
5
5
|
|
|
6
|
-
from ..symbol import Expression, Symbol
|
|
7
6
|
from ..components import FileReader
|
|
7
|
+
from ..symbol import Expression, Symbol
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class FileMerger(Expression):
|
|
@@ -12,8 +12,12 @@ class FileMerger(Expression):
|
|
|
12
12
|
Class to merge contents of multiple files into one, specified by their file endings and root path.
|
|
13
13
|
Files specified in the exclude list will not be included.
|
|
14
14
|
"""
|
|
15
|
-
def __init__(self, file_endings:
|
|
16
|
-
file_excludes:
|
|
15
|
+
def __init__(self, file_endings: list[str] | None = None,
|
|
16
|
+
file_excludes: list[str] | None = None, **kwargs):
|
|
17
|
+
if file_excludes is None:
|
|
18
|
+
file_excludes = ['__init__.py', '__pycache__', 'LICENSE', 'requirements.txt', 'environment.yaml', '.git']
|
|
19
|
+
if file_endings is None:
|
|
20
|
+
file_endings = ['.py', '.md', '.txt', '.sh', '.pdf', '.json', '.yaml', '.java', '.cpp', '.hpp', '.c', '.h', '.js', '.css', '.html', '.xml', '.csv', '.tsv', '.yml', '.rst', '.ipynb', '.tex', '.bib']
|
|
17
21
|
super().__init__(**kwargs)
|
|
18
22
|
self.file_endings = file_endings
|
|
19
23
|
self.file_excludes = file_excludes
|
|
@@ -31,29 +35,30 @@ class FileMerger(Expression):
|
|
|
31
35
|
|
|
32
36
|
# Implement recursive file search
|
|
33
37
|
# use tqdm for progress bar and description
|
|
34
|
-
tqdm_desc =
|
|
38
|
+
tqdm_desc = "Reading file: ..."
|
|
35
39
|
# use os.walk to recursively search for files in the root path
|
|
36
40
|
progress = tqdm(os.walk(root_path), desc=tqdm_desc)
|
|
37
41
|
|
|
38
|
-
for root,
|
|
42
|
+
for root, _dirs, files in progress:
|
|
39
43
|
for file in files:
|
|
40
|
-
file_path =
|
|
44
|
+
file_path = Path(root) / file
|
|
45
|
+
file_path_str = file_path.as_posix()
|
|
41
46
|
# Exclude files with the specified names in the path
|
|
42
|
-
if any(exclude in
|
|
47
|
+
if any(exclude in file_path_str for exclude in self.file_excludes):
|
|
43
48
|
continue
|
|
44
49
|
|
|
45
50
|
# Look only for files with the specified endings
|
|
46
51
|
if file.endswith(tuple(self.file_endings)):
|
|
47
52
|
# Read in the file using the FileReader
|
|
48
|
-
file_content = self.reader(
|
|
53
|
+
file_content = self.reader(file_path_str, **kwargs).value
|
|
49
54
|
|
|
50
55
|
# escape file name spaces
|
|
51
|
-
|
|
56
|
+
file_path_escaped = file_path_str.replace(" ", "\\ ")
|
|
52
57
|
|
|
53
58
|
# Append start and end markers for each file
|
|
54
|
-
file_content = f"# ----[FILE_START]<PART1/1>{
|
|
59
|
+
file_content = f"# ----[FILE_START]<PART1/1>{file_path_escaped}[FILE_CONTENT]:\n" + \
|
|
55
60
|
file_content + \
|
|
56
|
-
f"\n# ----[FILE_END]{
|
|
61
|
+
f"\n# ----[FILE_END]{file_path_escaped}\n"
|
|
57
62
|
|
|
58
63
|
# Merge the file contents
|
|
59
64
|
merged_file += file_content
|
|
@@ -63,4 +68,4 @@ class FileMerger(Expression):
|
|
|
63
68
|
progress.set_description(tqdm_desc)
|
|
64
69
|
|
|
65
70
|
# Return the merged file as a Symbol
|
|
66
|
-
return self._to_symbol(merged_file)
|
|
71
|
+
return self._to_symbol(merged_file)
|
symai/extended/graph.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
1
2
|
from multiprocessing import Pool
|
|
2
|
-
from typing import Callable
|
|
3
3
|
|
|
4
4
|
from .. import core
|
|
5
5
|
from ..formatter import SentenceFormatter
|
|
@@ -7,6 +7,9 @@ from ..post_processors import StripPostProcessor
|
|
|
7
7
|
from ..pre_processors import PreProcessor
|
|
8
8
|
from ..prompts import Prompt
|
|
9
9
|
from ..symbol import Expression, Symbol
|
|
10
|
+
from ..utils import UserMessage
|
|
11
|
+
|
|
12
|
+
_DEFAULT_SENTENCE_FORMATTER = SentenceFormatter()
|
|
10
13
|
|
|
11
14
|
GRAPH_DESCRIPTION = """[Description]
|
|
12
15
|
Build source-target relationship pairs for named entities based for the [DATA] section. The [DATA] section contains one sentence.
|
|
@@ -18,7 +21,7 @@ If more than one entity pair is extracted from the same sentence, then the CSV f
|
|
|
18
21
|
|
|
19
22
|
class GraphPreProcessor(PreProcessor):
|
|
20
23
|
def __call__(self, argument):
|
|
21
|
-
return '$> {
|
|
24
|
+
return f'$> {argument.args[0]!s} =>'
|
|
22
25
|
|
|
23
26
|
|
|
24
27
|
class Graph(Expression):
|
|
@@ -26,14 +29,14 @@ class Graph(Expression):
|
|
|
26
29
|
def static_context(self) -> str:
|
|
27
30
|
return GRAPH_DESCRIPTION
|
|
28
31
|
|
|
29
|
-
def __init__(self, formatter: Callable =
|
|
32
|
+
def __init__(self, formatter: Callable = _DEFAULT_SENTENCE_FORMATTER, n_workers: int = 1, verbose: bool = False, **kwargs):
|
|
30
33
|
super().__init__(**kwargs)
|
|
31
34
|
self.formatter = formatter
|
|
32
35
|
self.n_workers = n_workers
|
|
33
36
|
self.sym_return_type = Graph
|
|
34
37
|
self.verbose = verbose
|
|
35
38
|
|
|
36
|
-
def process_symbol(self, s, *
|
|
39
|
+
def process_symbol(self, s, *_args, **kwargs):
|
|
37
40
|
res = ''
|
|
38
41
|
|
|
39
42
|
@core.few_shot(prompt="Extract relationships between entities:\n",
|
|
@@ -50,26 +53,28 @@ class Graph(Expression):
|
|
|
50
53
|
pass
|
|
51
54
|
|
|
52
55
|
if len(str(s)) > 0:
|
|
53
|
-
if self.verbose:
|
|
56
|
+
if self.verbose:
|
|
57
|
+
UserMessage(str(s))
|
|
54
58
|
r = _func(self, s)
|
|
55
59
|
rec = str(r)
|
|
56
60
|
lines = rec.split('\n')
|
|
57
|
-
for
|
|
58
|
-
|
|
59
|
-
if len(
|
|
60
|
-
csv =
|
|
61
|
+
for line in lines:
|
|
62
|
+
stripped_line = line.strip()
|
|
63
|
+
if len(stripped_line) > 0:
|
|
64
|
+
csv = stripped_line.split(',')
|
|
61
65
|
try:
|
|
62
66
|
if len(csv) == 3 and \
|
|
63
67
|
csv[0].strip() != '' and \
|
|
64
68
|
csv[1].strip() != '' and \
|
|
65
69
|
int(csv[2].strip()) > 0:
|
|
66
|
-
res +=
|
|
70
|
+
res += stripped_line + '\n'
|
|
67
71
|
except Exception as e:
|
|
68
|
-
if self.verbose:
|
|
72
|
+
if self.verbose:
|
|
73
|
+
UserMessage(str(e))
|
|
69
74
|
pass
|
|
70
75
|
return res
|
|
71
76
|
|
|
72
|
-
def forward(self, sym: Symbol, **
|
|
77
|
+
def forward(self, sym: Symbol, **_kwargs) -> Symbol:
|
|
73
78
|
res = 'source,target,value\n'
|
|
74
79
|
sym_list = self.formatter(sym).value
|
|
75
80
|
if self.n_workers == 1:
|
|
@@ -82,4 +87,4 @@ class Graph(Expression):
|
|
|
82
87
|
res += r
|
|
83
88
|
return res
|
|
84
89
|
|
|
85
|
-
|
|
90
|
+
_DEFAULT_SENTENCE_FORMATTER = SentenceFormatter()
|
|
@@ -76,13 +76,11 @@ class HtmlStyleTemplate(Expression):
|
|
|
76
76
|
These strings are combined into a single `Symbol` object which is then clustered.
|
|
77
77
|
Finally, the `render` method applies the `html_template` to the clustered `Symbol` and returns the result.
|
|
78
78
|
"""
|
|
79
|
-
if
|
|
79
|
+
if not isinstance(sym, Symbol):
|
|
80
80
|
sym = Symbol(sym)
|
|
81
81
|
html_data = list(self.html_stream(sym, **kwargs))
|
|
82
82
|
style_data = [str(self.style_template(html,
|
|
83
83
|
template=HTML_TEMPLATE_STYLE,
|
|
84
84
|
placeholder='{{placeholder}}',
|
|
85
85
|
**kwargs)) for html in html_data]
|
|
86
|
-
|
|
87
|
-
res = Symbol(res)
|
|
88
|
-
return res
|
|
86
|
+
return Symbol('\n'.join(style_data))
|