symbolicai 1.0.0__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- symai/__init__.py +198 -134
- symai/backend/base.py +51 -51
- symai/backend/engines/drawing/engine_bfl.py +33 -33
- symai/backend/engines/drawing/engine_gpt_image.py +4 -10
- symai/backend/engines/embedding/engine_llama_cpp.py +50 -35
- symai/backend/engines/embedding/engine_openai.py +22 -16
- symai/backend/engines/execute/engine_python.py +16 -16
- symai/backend/engines/files/engine_io.py +51 -49
- symai/backend/engines/imagecaptioning/engine_blip2.py +27 -23
- symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +53 -46
- symai/backend/engines/index/engine_pinecone.py +116 -88
- symai/backend/engines/index/engine_qdrant.py +1011 -0
- symai/backend/engines/index/engine_vectordb.py +78 -52
- symai/backend/engines/lean/engine_lean4.py +65 -25
- symai/backend/engines/neurosymbolic/__init__.py +35 -28
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +137 -135
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +145 -152
- symai/backend/engines/neurosymbolic/engine_cerebras.py +328 -0
- symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +75 -49
- symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +199 -155
- symai/backend/engines/neurosymbolic/engine_groq.py +106 -72
- symai/backend/engines/neurosymbolic/engine_huggingface.py +100 -67
- symai/backend/engines/neurosymbolic/engine_llama_cpp.py +121 -93
- symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +213 -132
- symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +180 -137
- symai/backend/engines/ocr/engine_apilayer.py +18 -20
- symai/backend/engines/output/engine_stdout.py +9 -9
- symai/backend/engines/{webscraping → scrape}/engine_requests.py +25 -11
- symai/backend/engines/search/engine_openai.py +95 -83
- symai/backend/engines/search/engine_parallel.py +665 -0
- symai/backend/engines/search/engine_perplexity.py +40 -41
- symai/backend/engines/search/engine_serpapi.py +33 -28
- symai/backend/engines/speech_to_text/engine_local_whisper.py +37 -27
- symai/backend/engines/symbolic/engine_wolframalpha.py +14 -8
- symai/backend/engines/text_to_speech/engine_openai.py +15 -19
- symai/backend/engines/text_vision/engine_clip.py +34 -28
- symai/backend/engines/userinput/engine_console.py +3 -4
- symai/backend/mixin/__init__.py +4 -0
- symai/backend/mixin/anthropic.py +48 -40
- symai/backend/mixin/cerebras.py +9 -0
- symai/backend/mixin/deepseek.py +4 -5
- symai/backend/mixin/google.py +5 -4
- symai/backend/mixin/groq.py +2 -4
- symai/backend/mixin/openai.py +132 -110
- symai/backend/settings.py +14 -14
- symai/chat.py +164 -94
- symai/collect/dynamic.py +13 -11
- symai/collect/pipeline.py +39 -31
- symai/collect/stats.py +109 -69
- symai/components.py +578 -238
- symai/constraints.py +14 -5
- symai/core.py +1495 -1210
- symai/core_ext.py +55 -50
- symai/endpoints/api.py +113 -58
- symai/extended/api_builder.py +22 -17
- symai/extended/arxiv_pdf_parser.py +13 -5
- symai/extended/bibtex_parser.py +8 -4
- symai/extended/conversation.py +88 -69
- symai/extended/document.py +40 -27
- symai/extended/file_merger.py +45 -7
- symai/extended/graph.py +38 -24
- symai/extended/html_style_template.py +17 -11
- symai/extended/interfaces/blip_2.py +1 -1
- symai/extended/interfaces/clip.py +4 -2
- symai/extended/interfaces/console.py +5 -3
- symai/extended/interfaces/dall_e.py +3 -1
- symai/extended/interfaces/file.py +2 -0
- symai/extended/interfaces/flux.py +3 -1
- symai/extended/interfaces/gpt_image.py +15 -6
- symai/extended/interfaces/input.py +2 -1
- symai/extended/interfaces/llava.py +1 -1
- symai/extended/interfaces/{naive_webscraping.py → naive_scrape.py} +3 -2
- symai/extended/interfaces/naive_vectordb.py +2 -2
- symai/extended/interfaces/ocr.py +4 -2
- symai/extended/interfaces/openai_search.py +2 -0
- symai/extended/interfaces/parallel.py +30 -0
- symai/extended/interfaces/perplexity.py +2 -0
- symai/extended/interfaces/pinecone.py +6 -4
- symai/extended/interfaces/python.py +2 -0
- symai/extended/interfaces/serpapi.py +2 -0
- symai/extended/interfaces/terminal.py +0 -1
- symai/extended/interfaces/tts.py +2 -1
- symai/extended/interfaces/whisper.py +2 -1
- symai/extended/interfaces/wolframalpha.py +1 -0
- symai/extended/metrics/__init__.py +1 -1
- symai/extended/metrics/similarity.py +5 -2
- symai/extended/os_command.py +31 -22
- symai/extended/packages/symdev.py +39 -34
- symai/extended/packages/sympkg.py +30 -27
- symai/extended/packages/symrun.py +46 -35
- symai/extended/repo_cloner.py +10 -9
- symai/extended/seo_query_optimizer.py +15 -12
- symai/extended/solver.py +104 -76
- symai/extended/summarizer.py +8 -7
- symai/extended/taypan_interpreter.py +10 -9
- symai/extended/vectordb.py +28 -15
- symai/formatter/formatter.py +39 -31
- symai/formatter/regex.py +46 -44
- symai/functional.py +184 -86
- symai/imports.py +85 -51
- symai/interfaces.py +1 -1
- symai/memory.py +33 -24
- symai/menu/screen.py +28 -19
- symai/misc/console.py +27 -27
- symai/misc/loader.py +4 -3
- symai/models/base.py +147 -76
- symai/models/errors.py +1 -1
- symai/ops/__init__.py +1 -1
- symai/ops/measures.py +17 -14
- symai/ops/primitives.py +933 -635
- symai/post_processors.py +28 -24
- symai/pre_processors.py +58 -52
- symai/processor.py +15 -9
- symai/prompts.py +714 -649
- symai/server/huggingface_server.py +115 -32
- symai/server/llama_cpp_server.py +14 -6
- symai/server/qdrant_server.py +206 -0
- symai/shell.py +98 -39
- symai/shellsv.py +307 -223
- symai/strategy.py +135 -81
- symai/symbol.py +276 -225
- symai/utils.py +62 -46
- {symbolicai-1.0.0.dist-info → symbolicai-1.1.1.dist-info}/METADATA +19 -9
- symbolicai-1.1.1.dist-info/RECORD +169 -0
- symbolicai-1.0.0.dist-info/RECORD +0 -163
- {symbolicai-1.0.0.dist-info → symbolicai-1.1.1.dist-info}/WHEEL +0 -0
- {symbolicai-1.0.0.dist-info → symbolicai-1.1.1.dist-info}/entry_points.txt +0 -0
- {symbolicai-1.0.0.dist-info → symbolicai-1.1.1.dist-info}/licenses/LICENSE +0 -0
- {symbolicai-1.0.0.dist-info → symbolicai-1.1.1.dist-info}/top_level.txt +0 -0
symai/extended/api_builder.py
CHANGED
|
@@ -64,7 +64,7 @@ res = run(value) # [MANAGED] must contain this line, do not change
|
|
|
64
64
|
|
|
65
65
|
class APIBuilderPreProcessor(PreProcessor):
|
|
66
66
|
def __call__(self, argument):
|
|
67
|
-
return f
|
|
67
|
+
return f"$> {argument.args[0]!s} =>"
|
|
68
68
|
|
|
69
69
|
|
|
70
70
|
class APIBuilder(Expression):
|
|
@@ -77,9 +77,12 @@ class APIBuilder(Expression):
|
|
|
77
77
|
self.sym_return_type = APIBuilder
|
|
78
78
|
|
|
79
79
|
def forward(self, sym: Symbol, **kwargs) -> Symbol:
|
|
80
|
-
@core.zero_shot(
|
|
81
|
-
|
|
82
|
-
|
|
80
|
+
@core.zero_shot(
|
|
81
|
+
prompt="Build the API call code:\n",
|
|
82
|
+
pre_processors=[APIBuilderPreProcessor()],
|
|
83
|
+
post_processors=[CodeExtractPostProcessor()],
|
|
84
|
+
**kwargs,
|
|
85
|
+
)
|
|
83
86
|
def _func(_, text) -> str:
|
|
84
87
|
pass
|
|
85
88
|
|
|
@@ -96,18 +99,20 @@ class StackTraceRetryExecutor(Expression):
|
|
|
96
99
|
def forward(self, code: Symbol, request: Symbol, **kwargs) -> Symbol:
|
|
97
100
|
code = str(code)
|
|
98
101
|
# Set value that gets passed on to the 'run' function in the generated code
|
|
99
|
-
value = request.value
|
|
102
|
+
value = request.value # do not remove this line
|
|
100
103
|
# Create the 'run' function
|
|
101
104
|
self._runnable = self.executor(code, locals=locals().copy(), globals=globals().copy())
|
|
102
|
-
result = self._runnable[
|
|
105
|
+
result = self._runnable["locals"]["run"](value)
|
|
103
106
|
retry = 0
|
|
104
107
|
# Retry if there is a 'Traceback' in the result
|
|
105
|
-
while
|
|
106
|
-
self._runnable = self.executor(
|
|
107
|
-
|
|
108
|
+
while "Traceback" in result and retry <= self.max_retries:
|
|
109
|
+
self._runnable = self.executor(
|
|
110
|
+
code, payload=result, locals=locals().copy(), globals=globals().copy(), **kwargs
|
|
111
|
+
)
|
|
112
|
+
result = self._runnable["locals"]["run"](value)
|
|
108
113
|
retry += 1
|
|
109
|
-
if
|
|
110
|
-
result = self._runnable[
|
|
114
|
+
if "locals_res" in self._runnable:
|
|
115
|
+
result = self._runnable["locals_res"]
|
|
111
116
|
return result
|
|
112
117
|
|
|
113
118
|
|
|
@@ -129,14 +134,14 @@ class APIExecutor(Expression):
|
|
|
129
134
|
def forward(self, request: Symbol, **_kwargs) -> Symbol:
|
|
130
135
|
self._request = self._to_symbol(request)
|
|
131
136
|
if self._verbose:
|
|
132
|
-
UserMessage(f
|
|
137
|
+
UserMessage(f"[REQUEST] {self._request}")
|
|
133
138
|
# Generate the code to implement the API call
|
|
134
|
-
self._code
|
|
139
|
+
self._code = self.builder(self._request)
|
|
135
140
|
if self._verbose:
|
|
136
|
-
UserMessage(f
|
|
141
|
+
UserMessage(f"[GENERATED_CODE] {self._code}")
|
|
137
142
|
# Execute the code to define the 'run' function
|
|
138
|
-
self._result
|
|
143
|
+
self._result = self.executor(self._code, request=self._request)
|
|
139
144
|
if self._verbose:
|
|
140
|
-
UserMessage(f
|
|
141
|
-
self._value
|
|
145
|
+
UserMessage(f"[RESULT]: {self._result}")
|
|
146
|
+
self._value = self._result
|
|
142
147
|
return self
|
|
@@ -12,7 +12,9 @@ from .file_merger import FileMerger
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class ArxivPdfParser(Expression):
|
|
15
|
-
def __init__(
|
|
15
|
+
def __init__(
|
|
16
|
+
self, url_pattern: str = r"https://arxiv.org/(?:pdf|abs)/(\d+.\d+)(?:\.pdf)?", **kwargs
|
|
17
|
+
):
|
|
16
18
|
super().__init__(**kwargs)
|
|
17
19
|
self.url_pattern = url_pattern
|
|
18
20
|
self.merger = FileMerger()
|
|
@@ -22,7 +24,11 @@ class ArxivPdfParser(Expression):
|
|
|
22
24
|
urls = re.findall(self.url_pattern, str(data))
|
|
23
25
|
|
|
24
26
|
# Convert all urls to pdf urls
|
|
25
|
-
pdf_urls = [
|
|
27
|
+
pdf_urls = [
|
|
28
|
+
"https://arxiv.org/pdf/"
|
|
29
|
+
+ (f"{url.split('/')[-1]}.pdf" if "pdf" not in url else {url.split("/")[-1]})
|
|
30
|
+
for url in urls
|
|
31
|
+
]
|
|
26
32
|
|
|
27
33
|
# Create temporary folder in the home directory
|
|
28
34
|
output_path = HOME_PATH / "temp" / "downloads"
|
|
@@ -31,7 +37,9 @@ class ArxivPdfParser(Expression):
|
|
|
31
37
|
pdf_files = []
|
|
32
38
|
with ThreadPoolExecutor() as executor:
|
|
33
39
|
# Download all pdfs in parallel
|
|
34
|
-
future_to_url = {
|
|
40
|
+
future_to_url = {
|
|
41
|
+
executor.submit(self.download_pdf, url, output_path): url for url in pdf_urls
|
|
42
|
+
}
|
|
35
43
|
for future in as_completed(future_to_url):
|
|
36
44
|
url = future_to_url[future]
|
|
37
45
|
try:
|
|
@@ -56,7 +64,7 @@ class ArxivPdfParser(Expression):
|
|
|
56
64
|
def download_pdf(self, url, output_path):
|
|
57
65
|
# Download pdfs
|
|
58
66
|
response = requests.get(url)
|
|
59
|
-
file_path = Path(output_path) / f
|
|
60
|
-
with file_path.open(
|
|
67
|
+
file_path = Path(output_path) / f"{url.split('/')[-1]}"
|
|
68
|
+
with file_path.open("wb") as f:
|
|
61
69
|
f.write(response.content)
|
|
62
70
|
return str(file_path)
|
symai/extended/bibtex_parser.py
CHANGED
|
@@ -68,7 +68,7 @@ Multimodal Few-Shot Learning with Frozen Language Models Maria Tsimpoukelli
|
|
|
68
68
|
|
|
69
69
|
class BibTexPreProcessor(PreProcessor):
|
|
70
70
|
def __call__(self, argument):
|
|
71
|
-
return f
|
|
71
|
+
return f">>>\n{argument.args[0]!s}\n\n<<<\n"
|
|
72
72
|
|
|
73
73
|
|
|
74
74
|
class BibTexParser(Expression):
|
|
@@ -81,9 +81,13 @@ class BibTexParser(Expression):
|
|
|
81
81
|
self.sym_return_type = BibTexParser
|
|
82
82
|
|
|
83
83
|
def forward(self, sym: Symbol, **kwargs) -> Symbol:
|
|
84
|
-
@core.zero_shot(
|
|
85
|
-
|
|
86
|
-
|
|
84
|
+
@core.zero_shot(
|
|
85
|
+
prompt="Create bibtex entries:\n",
|
|
86
|
+
pre_processors=[BibTexPreProcessor()],
|
|
87
|
+
post_processors=[CodeExtractPostProcessor()],
|
|
88
|
+
**kwargs,
|
|
89
|
+
)
|
|
87
90
|
def _func(_, text) -> str:
|
|
88
91
|
pass
|
|
92
|
+
|
|
89
93
|
return _func(self, sym)
|
symai/extended/conversation.py
CHANGED
|
@@ -19,22 +19,25 @@ _DEFAULT_TEXT_CONTAINER_FORMATTER = TextContainerFormatter(text_split=4)
|
|
|
19
19
|
class CodeFormatter:
|
|
20
20
|
def __call__(self, value: str, *_args: Any, **_kwds: Any) -> Any:
|
|
21
21
|
# extract code from chat conversations or ```<language>\n{code}\n``` blocks
|
|
22
|
-
return Symbol(value).extract(
|
|
22
|
+
return Symbol(value).extract(
|
|
23
|
+
"Only extract code without ``` block markers or chat conversations"
|
|
24
|
+
)
|
|
23
25
|
|
|
24
26
|
|
|
25
27
|
class Conversation(SlidingWindowStringConcatMemory):
|
|
26
28
|
def __init__(
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
29
|
+
self,
|
|
30
|
+
init: str | None = None,
|
|
31
|
+
file_link: list[str] | None = None,
|
|
32
|
+
url_link: list[str] | None = None,
|
|
33
|
+
index_name: str | None = None,
|
|
34
|
+
auto_print: bool = True,
|
|
35
|
+
truncation_percentage: float = 0.8,
|
|
36
|
+
truncation_type: str = "head",
|
|
37
|
+
with_metadata: bool = False,
|
|
38
|
+
*args,
|
|
39
|
+
**kwargs,
|
|
40
|
+
):
|
|
38
41
|
super().__init__(*args, **kwargs)
|
|
39
42
|
self.truncation_percentage = truncation_percentage
|
|
40
43
|
self.truncation_type = truncation_type
|
|
@@ -48,9 +51,9 @@ class Conversation(SlidingWindowStringConcatMemory):
|
|
|
48
51
|
self.index_name = index_name
|
|
49
52
|
self.seo_opt = SEOQueryOptimizer()
|
|
50
53
|
self.reader = FileReader(with_metadata=with_metadata)
|
|
51
|
-
self.scraper = Interface(
|
|
52
|
-
self.user_tag =
|
|
53
|
-
self.bot_tag =
|
|
54
|
+
self.scraper = Interface("naive_scrape")
|
|
55
|
+
self.user_tag = "USER::"
|
|
56
|
+
self.bot_tag = "ASSISTANT::"
|
|
54
57
|
|
|
55
58
|
if init is not None:
|
|
56
59
|
self.store_system_message(init, *args, **kwargs)
|
|
@@ -63,14 +66,16 @@ class Conversation(SlidingWindowStringConcatMemory):
|
|
|
63
66
|
self.indexer = None
|
|
64
67
|
self.index = None
|
|
65
68
|
if index_name is not None:
|
|
66
|
-
UserMessage(
|
|
69
|
+
UserMessage(
|
|
70
|
+
"Index not supported for conversation class.", raise_with=NotImplementedError
|
|
71
|
+
)
|
|
67
72
|
|
|
68
73
|
def __getstate__(self):
|
|
69
74
|
state = super().__getstate__().copy()
|
|
70
|
-
state.pop(
|
|
71
|
-
state.pop(
|
|
72
|
-
state.pop(
|
|
73
|
-
state.pop(
|
|
75
|
+
state.pop("seo_opt", None)
|
|
76
|
+
state.pop("indexer", None)
|
|
77
|
+
state.pop("index", None)
|
|
78
|
+
state.pop("reader", None)
|
|
74
79
|
return state
|
|
75
80
|
|
|
76
81
|
def __setstate__(self, state):
|
|
@@ -78,7 +83,9 @@ class Conversation(SlidingWindowStringConcatMemory):
|
|
|
78
83
|
self.seo_opt = SEOQueryOptimizer()
|
|
79
84
|
self.reader = FileReader()
|
|
80
85
|
if self.index_name is not None:
|
|
81
|
-
UserMessage(
|
|
86
|
+
UserMessage(
|
|
87
|
+
"Index not supported for conversation class.", raise_with=NotImplementedError
|
|
88
|
+
)
|
|
82
89
|
|
|
83
90
|
def store_system_message(self, message: str, *_args, **_kwargs):
|
|
84
91
|
val = f"[SYSTEM_INSTRUCTION::]: <<<\n{message!s}\n>>>\n"
|
|
@@ -100,7 +107,7 @@ class Conversation(SlidingWindowStringConcatMemory):
|
|
|
100
107
|
path_obj = Path(file_path)
|
|
101
108
|
path_obj.parent.mkdir(parents=True, exist_ok=True)
|
|
102
109
|
# Save the conversation object as a pickle file
|
|
103
|
-
with path_obj.open(
|
|
110
|
+
with path_obj.open("wb") as handle:
|
|
104
111
|
pickle.dump(conversation, handle, protocol=pickle.HIGHEST_PROTOCOL)
|
|
105
112
|
|
|
106
113
|
def load_conversation_state(self, path: str) -> "Conversation":
|
|
@@ -110,7 +117,7 @@ class Conversation(SlidingWindowStringConcatMemory):
|
|
|
110
117
|
if path_obj.stat().st_size <= 0:
|
|
111
118
|
UserMessage("File is empty.", raise_with=Exception)
|
|
112
119
|
# Load the conversation object from a pickle file
|
|
113
|
-
with path_obj.open(
|
|
120
|
+
with path_obj.open("rb") as handle:
|
|
114
121
|
conversation_state = pickle.load(handle)
|
|
115
122
|
else:
|
|
116
123
|
UserMessage("File does not exist or is empty.", raise_with=Exception)
|
|
@@ -123,14 +130,16 @@ class Conversation(SlidingWindowStringConcatMemory):
|
|
|
123
130
|
self._memory = conversation_state._memory
|
|
124
131
|
self.truncation_percentage = conversation_state.truncation_percentage
|
|
125
132
|
self.truncation_type = conversation_state.truncation_type
|
|
126
|
-
self.auto_print
|
|
133
|
+
self.auto_print = conversation_state.auto_print
|
|
127
134
|
self.file_link = conversation_state.file_link
|
|
128
135
|
self.url_link = conversation_state.url_link
|
|
129
|
-
self.index_name
|
|
136
|
+
self.index_name = conversation_state.index_name
|
|
130
137
|
self.seo_opt = SEOQueryOptimizer()
|
|
131
138
|
self.reader = FileReader()
|
|
132
139
|
if self.index_name is not None:
|
|
133
|
-
UserMessage(
|
|
140
|
+
UserMessage(
|
|
141
|
+
"Index not supported for conversation class.", raise_with=NotImplementedError
|
|
142
|
+
)
|
|
134
143
|
return self
|
|
135
144
|
|
|
136
145
|
def commit(self, target_file: str | None = None, formatter: Callable | None = None):
|
|
@@ -143,20 +152,22 @@ class Conversation(SlidingWindowStringConcatMemory):
|
|
|
143
152
|
elif isinstance(file_link, list) and len(file_link) == 1:
|
|
144
153
|
file_link = file_link[0]
|
|
145
154
|
else:
|
|
146
|
-
file_link = None
|
|
147
|
-
UserMessage(
|
|
155
|
+
file_link = None # cannot commit to multiple files
|
|
156
|
+
UserMessage("Cannot commit to multiple files.", raise_with=Exception)
|
|
148
157
|
if file_link:
|
|
149
158
|
# if file extension is .py, then format code
|
|
150
159
|
format_ = formatter
|
|
151
|
-
formatter =
|
|
160
|
+
formatter = (
|
|
161
|
+
CodeFormatter() if format_ is None and file_link.endswith(".py") else formatter
|
|
162
|
+
)
|
|
152
163
|
val = self.value
|
|
153
164
|
if formatter:
|
|
154
165
|
val = formatter(val)
|
|
155
166
|
# if file does not exist, create it
|
|
156
|
-
with Path(file_link).open(
|
|
167
|
+
with Path(file_link).open("w") as file:
|
|
157
168
|
file.write(str(val))
|
|
158
169
|
else:
|
|
159
|
-
UserMessage(
|
|
170
|
+
UserMessage("File link is not set or a set of files.", raise_with=Exception)
|
|
160
171
|
|
|
161
172
|
def save(self, path: str, replace: bool = False) -> Symbol:
|
|
162
173
|
return Symbol(self._memory).save(path, replace=replace)
|
|
@@ -174,7 +185,7 @@ class Conversation(SlidingWindowStringConcatMemory):
|
|
|
174
185
|
res = self.recall(query, *args, payload=payload, **kwargs)
|
|
175
186
|
|
|
176
187
|
# if user is requesting to preview the response, then return only the preview result
|
|
177
|
-
if kwargs.get(
|
|
188
|
+
if kwargs.get("preview"):
|
|
178
189
|
if self.auto_print:
|
|
179
190
|
UserMessage(str(res), style="text")
|
|
180
191
|
return res
|
|
@@ -190,12 +201,14 @@ class Conversation(SlidingWindowStringConcatMemory):
|
|
|
190
201
|
return res
|
|
191
202
|
|
|
192
203
|
def _apply_truncation_overrides(self, kwargs: dict[str, Any]) -> dict[str, Any]:
|
|
193
|
-
dynamic_truncation_percentage = kwargs.get(
|
|
194
|
-
|
|
204
|
+
dynamic_truncation_percentage = kwargs.get(
|
|
205
|
+
"truncation_percentage", self.truncation_percentage
|
|
206
|
+
)
|
|
207
|
+
dynamic_truncation_type = kwargs.get("truncation_type", self.truncation_type)
|
|
195
208
|
return {
|
|
196
209
|
**kwargs,
|
|
197
|
-
|
|
198
|
-
|
|
210
|
+
"truncation_percentage": dynamic_truncation_percentage,
|
|
211
|
+
"truncation_type": dynamic_truncation_type,
|
|
199
212
|
}
|
|
200
213
|
|
|
201
214
|
def _retrieve_index_memory(self, query: Symbol, args: tuple[Any, ...], kwargs: dict[str, Any]):
|
|
@@ -205,7 +218,7 @@ class Conversation(SlidingWindowStringConcatMemory):
|
|
|
205
218
|
memory_split = self._memory.split(self.marker)
|
|
206
219
|
memory_shards = []
|
|
207
220
|
for shard in memory_split:
|
|
208
|
-
if shard.strip() ==
|
|
221
|
+
if shard.strip() == "":
|
|
209
222
|
continue
|
|
210
223
|
memory_shards.append(shard)
|
|
211
224
|
|
|
@@ -213,33 +226,33 @@ class Conversation(SlidingWindowStringConcatMemory):
|
|
|
213
226
|
if length_memory_shards > 5:
|
|
214
227
|
memory_shards = memory_shards[:2] + memory_shards[-3:]
|
|
215
228
|
elif length_memory_shards > 3:
|
|
216
|
-
retained = memory_shards[-(length_memory_shards - 2):]
|
|
229
|
+
retained = memory_shards[-(length_memory_shards - 2) :]
|
|
217
230
|
memory_shards = memory_shards[:2] + retained
|
|
218
231
|
|
|
219
|
-
search_query = query |
|
|
220
|
-
if kwargs.get(
|
|
221
|
-
search_query = self.seo_opt(
|
|
232
|
+
search_query = query | "\n" | "\n".join(memory_shards)
|
|
233
|
+
if kwargs.get("use_seo_opt"):
|
|
234
|
+
search_query = self.seo_opt("[Query]:" | search_query)
|
|
222
235
|
memory = self.index(search_query, *args, **kwargs)
|
|
223
236
|
|
|
224
|
-
if
|
|
237
|
+
if "raw_result" in kwargs:
|
|
225
238
|
UserMessage(str(memory), style="text")
|
|
226
239
|
return memory
|
|
227
240
|
|
|
228
241
|
def _build_payload(self, kwargs: dict[str, Any], memory) -> str:
|
|
229
|
-
payload =
|
|
230
|
-
if
|
|
242
|
+
payload = ""
|
|
243
|
+
if "payload" in kwargs:
|
|
231
244
|
payload = f"[Conversation Payload]:\n{kwargs.pop('payload')}\n"
|
|
232
245
|
|
|
233
|
-
index_memory =
|
|
246
|
+
index_memory = ""
|
|
234
247
|
if memory:
|
|
235
|
-
index_memory = f
|
|
236
|
-
return f
|
|
248
|
+
index_memory = f"[Index Retrieval]:\n{str(memory)[:1500]}\n"
|
|
249
|
+
return f"{index_memory}{payload}"
|
|
237
250
|
|
|
238
251
|
def _append_interaction_to_memory(self, query: Symbol, res: Symbol) -> None:
|
|
239
252
|
prompt = self.build_tag(self.user_tag, query)
|
|
240
253
|
self.store(prompt)
|
|
241
254
|
|
|
242
|
-
self._value = res.value
|
|
255
|
+
self._value = res.value # save last response
|
|
243
256
|
val = self.build_tag(self.bot_tag, res)
|
|
244
257
|
self.store(val)
|
|
245
258
|
|
|
@@ -276,27 +289,33 @@ Responses should be:
|
|
|
276
289
|
- Referenced to source when applicable
|
|
277
290
|
"""
|
|
278
291
|
|
|
292
|
+
|
|
279
293
|
@deprecated("Use `Conversation` instead for now. This will be removed/fixed in the future.")
|
|
280
294
|
class RetrievalAugmentedConversation(Conversation):
|
|
281
295
|
def __init__(
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
296
|
+
self,
|
|
297
|
+
folder_path: str | None = None,
|
|
298
|
+
*,
|
|
299
|
+
index_name: str | None = None,
|
|
300
|
+
max_depth: int | None = 0,
|
|
301
|
+
auto_print: bool = True,
|
|
302
|
+
top_k: int = 5,
|
|
303
|
+
formatter: Callable = _DEFAULT_TEXT_CONTAINER_FORMATTER,
|
|
304
|
+
overwrite: bool = False,
|
|
305
|
+
truncation_percentage: float = 0.8,
|
|
306
|
+
truncation_type: str = "head",
|
|
307
|
+
with_metadata: bool = False,
|
|
308
|
+
raw_result: bool | None = False,
|
|
309
|
+
new_dim: int | None = None,
|
|
310
|
+
**kwargs,
|
|
311
|
+
):
|
|
312
|
+
super().__init__(
|
|
313
|
+
auto_print=auto_print,
|
|
314
|
+
truncation_percentage=truncation_percentage,
|
|
315
|
+
truncation_type=truncation_type,
|
|
316
|
+
with_metadata=with_metadata,
|
|
317
|
+
**kwargs,
|
|
318
|
+
)
|
|
300
319
|
|
|
301
320
|
self.retriever = DocumentRetriever(
|
|
302
321
|
source=folder_path,
|
|
@@ -308,7 +327,7 @@ class RetrievalAugmentedConversation(Conversation):
|
|
|
308
327
|
with_metadata=with_metadata,
|
|
309
328
|
raw_result=raw_result,
|
|
310
329
|
new_dim=new_dim,
|
|
311
|
-
**kwargs
|
|
330
|
+
**kwargs,
|
|
312
331
|
)
|
|
313
332
|
|
|
314
333
|
self.index = self.retriever.index
|
|
@@ -333,14 +352,14 @@ class RetrievalAugmentedConversation(Conversation):
|
|
|
333
352
|
|
|
334
353
|
memory = self.index(query, *args, **kwargs)
|
|
335
354
|
|
|
336
|
-
if
|
|
355
|
+
if "raw_result" in kwargs:
|
|
337
356
|
UserMessage(str(memory), style="text")
|
|
338
357
|
return memory
|
|
339
358
|
|
|
340
359
|
prompt = self.build_tag(self.user_tag, query)
|
|
341
360
|
self.store(prompt)
|
|
342
361
|
|
|
343
|
-
payload = f
|
|
362
|
+
payload = f"[Index Retrieval]:\n{str(memory)[:1500]}\n"
|
|
344
363
|
|
|
345
364
|
res = self.recall(query, *args, payload=payload, **kwargs)
|
|
346
365
|
|
symai/extended/document.py
CHANGED
|
@@ -15,22 +15,24 @@ _DEFAULT_PARAGRAPH_FORMATTER = ParagraphFormatter()
|
|
|
15
15
|
|
|
16
16
|
class DocumentRetriever(Expression):
|
|
17
17
|
def __init__(
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
18
|
+
self,
|
|
19
|
+
source: str | None = None,
|
|
20
|
+
*,
|
|
21
|
+
index_name: str = Indexer.DEFAULT,
|
|
22
|
+
top_k: int = 5,
|
|
23
|
+
max_depth: int = 1,
|
|
24
|
+
formatter: Callable = _DEFAULT_PARAGRAPH_FORMATTER,
|
|
25
|
+
overwrite: bool = False,
|
|
26
|
+
with_metadata: bool = False,
|
|
27
|
+
raw_result: bool | None = False,
|
|
28
|
+
new_dim: int | None = None,
|
|
29
|
+
**kwargs,
|
|
30
|
+
):
|
|
31
31
|
super().__init__(**kwargs)
|
|
32
|
-
self.indexer = Indexer(
|
|
33
|
-
|
|
32
|
+
self.indexer = Indexer(
|
|
33
|
+
index_name=index_name, top_k=top_k, formatter=formatter, auto_add=False, new_dim=new_dim
|
|
34
|
+
)
|
|
35
|
+
self.reader = FileReader(with_metadata=with_metadata)
|
|
34
36
|
self.new_dim = new_dim
|
|
35
37
|
|
|
36
38
|
if overwrite:
|
|
@@ -39,21 +41,23 @@ class DocumentRetriever(Expression):
|
|
|
39
41
|
# we insert the text into the index if (1) index does not exist and (2) there's a specific source
|
|
40
42
|
if source is not None and not self.indexer.exists():
|
|
41
43
|
self.indexer.register()
|
|
42
|
-
text = self.parse_source(
|
|
44
|
+
text = self.parse_source(
|
|
45
|
+
source, with_metadata=with_metadata, max_depth=max_depth, **kwargs
|
|
46
|
+
)
|
|
43
47
|
self.index = self.indexer(data=text, raw_result=raw_result, **kwargs)
|
|
44
48
|
else:
|
|
45
49
|
# we don't insert the text at initialization since the index already exists and there's no specific source
|
|
46
50
|
self.index = self.indexer(raw_result=raw_result, **kwargs)
|
|
47
51
|
|
|
48
52
|
def forward(
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
+
self,
|
|
54
|
+
query: Symbol,
|
|
55
|
+
raw_result: bool | None = False,
|
|
56
|
+
) -> Symbol:
|
|
53
57
|
return self.index(
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
58
|
+
query,
|
|
59
|
+
raw_result=raw_result,
|
|
60
|
+
)
|
|
57
61
|
|
|
58
62
|
def insert(self, source: str | Path, **kwargs):
|
|
59
63
|
# dynamically insert data into the index given a session
|
|
@@ -61,12 +65,19 @@ class DocumentRetriever(Expression):
|
|
|
61
65
|
# - a string (e.g. something that the user wants to insert)
|
|
62
66
|
# - a file path (e.g. a new file that the user wants to insert)
|
|
63
67
|
# - a directory path (e.g. a new directory that the user wants to insert)
|
|
64
|
-
text = self.parse_source(
|
|
65
|
-
|
|
68
|
+
text = self.parse_source(
|
|
69
|
+
source,
|
|
70
|
+
with_metadata=kwargs.get("with_metadata", False),
|
|
71
|
+
max_depth=kwargs.get("max_depth", 1),
|
|
72
|
+
**kwargs,
|
|
73
|
+
)
|
|
74
|
+
# NOTE: Do we need `new_dim` here?
|
|
66
75
|
self.add(text, index_name=self.indexer.index_name, **kwargs)
|
|
67
76
|
self.config(None, save=True, index_name=self.indexer.index_name, **kwargs)
|
|
68
77
|
|
|
69
|
-
def parse_source(
|
|
78
|
+
def parse_source(
|
|
79
|
+
self, source: str, with_metadata: bool, max_depth: int, **kwargs
|
|
80
|
+
) -> list[Union[str, "TextContainer"]]:
|
|
70
81
|
maybe_path = Path(source)
|
|
71
82
|
if isinstance(source, str) and not (maybe_path.is_file() or maybe_path.is_dir()):
|
|
72
83
|
return Symbol(source).zip(new_dim=self.new_dim)
|
|
@@ -75,5 +86,7 @@ class DocumentRetriever(Expression):
|
|
|
75
86
|
return self.reader(files, with_metadata=with_metadata, **kwargs)
|
|
76
87
|
if maybe_path.is_file():
|
|
77
88
|
return self.reader(source, with_metadata=with_metadata, **kwargs)
|
|
78
|
-
UserMessage(
|
|
89
|
+
UserMessage(
|
|
90
|
+
f"Invalid source: {source}; must be a file, directory, or string", raise_with=ValueError
|
|
91
|
+
)
|
|
79
92
|
return []
|
symai/extended/file_merger.py
CHANGED
|
@@ -12,12 +12,48 @@ class FileMerger(Expression):
|
|
|
12
12
|
Class to merge contents of multiple files into one, specified by their file endings and root path.
|
|
13
13
|
Files specified in the exclude list will not be included.
|
|
14
14
|
"""
|
|
15
|
-
|
|
16
|
-
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
file_endings: list[str] | None = None,
|
|
19
|
+
file_excludes: list[str] | None = None,
|
|
20
|
+
**kwargs,
|
|
21
|
+
):
|
|
17
22
|
if file_excludes is None:
|
|
18
|
-
file_excludes = [
|
|
23
|
+
file_excludes = [
|
|
24
|
+
"__init__.py",
|
|
25
|
+
"__pycache__",
|
|
26
|
+
"LICENSE",
|
|
27
|
+
"requirements.txt",
|
|
28
|
+
"environment.yaml",
|
|
29
|
+
".git",
|
|
30
|
+
]
|
|
19
31
|
if file_endings is None:
|
|
20
|
-
file_endings = [
|
|
32
|
+
file_endings = [
|
|
33
|
+
".py",
|
|
34
|
+
".md",
|
|
35
|
+
".txt",
|
|
36
|
+
".sh",
|
|
37
|
+
".pdf",
|
|
38
|
+
".json",
|
|
39
|
+
".yaml",
|
|
40
|
+
".java",
|
|
41
|
+
".cpp",
|
|
42
|
+
".hpp",
|
|
43
|
+
".c",
|
|
44
|
+
".h",
|
|
45
|
+
".js",
|
|
46
|
+
".css",
|
|
47
|
+
".html",
|
|
48
|
+
".xml",
|
|
49
|
+
".csv",
|
|
50
|
+
".tsv",
|
|
51
|
+
".yml",
|
|
52
|
+
".rst",
|
|
53
|
+
".ipynb",
|
|
54
|
+
".tex",
|
|
55
|
+
".bib",
|
|
56
|
+
]
|
|
21
57
|
super().__init__(**kwargs)
|
|
22
58
|
self.file_endings = file_endings
|
|
23
59
|
self.file_excludes = file_excludes
|
|
@@ -56,9 +92,11 @@ class FileMerger(Expression):
|
|
|
56
92
|
file_path_escaped = file_path_str.replace(" ", "\\ ")
|
|
57
93
|
|
|
58
94
|
# Append start and end markers for each file
|
|
59
|
-
file_content =
|
|
60
|
-
|
|
61
|
-
|
|
95
|
+
file_content = (
|
|
96
|
+
f"# ----[FILE_START]<PART1/1>{file_path_escaped}[FILE_CONTENT]:\n"
|
|
97
|
+
+ file_content
|
|
98
|
+
+ f"\n# ----[FILE_END]{file_path_escaped}\n"
|
|
99
|
+
)
|
|
62
100
|
|
|
63
101
|
# Merge the file contents
|
|
64
102
|
merged_file += file_content
|