symbolicai 0.20.2__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- symai/__init__.py +96 -64
- symai/backend/base.py +93 -80
- symai/backend/engines/drawing/engine_bfl.py +12 -11
- symai/backend/engines/drawing/engine_gpt_image.py +108 -87
- symai/backend/engines/embedding/engine_llama_cpp.py +25 -28
- symai/backend/engines/embedding/engine_openai.py +3 -5
- symai/backend/engines/execute/engine_python.py +6 -5
- symai/backend/engines/files/engine_io.py +74 -67
- symai/backend/engines/imagecaptioning/engine_blip2.py +3 -3
- symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +54 -38
- symai/backend/engines/index/engine_pinecone.py +23 -24
- symai/backend/engines/index/engine_vectordb.py +16 -14
- symai/backend/engines/lean/engine_lean4.py +38 -34
- symai/backend/engines/neurosymbolic/__init__.py +41 -13
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +262 -182
- symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +263 -191
- symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +53 -49
- symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +212 -211
- symai/backend/engines/neurosymbolic/engine_groq.py +87 -63
- symai/backend/engines/neurosymbolic/engine_huggingface.py +21 -24
- symai/backend/engines/neurosymbolic/engine_llama_cpp.py +117 -48
- symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +256 -229
- symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +270 -150
- symai/backend/engines/ocr/engine_apilayer.py +6 -8
- symai/backend/engines/output/engine_stdout.py +1 -4
- symai/backend/engines/search/engine_openai.py +7 -7
- symai/backend/engines/search/engine_perplexity.py +5 -5
- symai/backend/engines/search/engine_serpapi.py +12 -14
- symai/backend/engines/speech_to_text/engine_local_whisper.py +20 -27
- symai/backend/engines/symbolic/engine_wolframalpha.py +3 -3
- symai/backend/engines/text_to_speech/engine_openai.py +5 -7
- symai/backend/engines/text_vision/engine_clip.py +7 -11
- symai/backend/engines/userinput/engine_console.py +3 -3
- symai/backend/engines/webscraping/engine_requests.py +81 -48
- symai/backend/mixin/__init__.py +13 -0
- symai/backend/mixin/anthropic.py +4 -2
- symai/backend/mixin/deepseek.py +2 -0
- symai/backend/mixin/google.py +2 -0
- symai/backend/mixin/openai.py +11 -3
- symai/backend/settings.py +83 -16
- symai/chat.py +101 -78
- symai/collect/__init__.py +7 -1
- symai/collect/dynamic.py +77 -69
- symai/collect/pipeline.py +35 -27
- symai/collect/stats.py +75 -63
- symai/components.py +198 -169
- symai/constraints.py +15 -12
- symai/core.py +698 -359
- symai/core_ext.py +32 -34
- symai/endpoints/api.py +80 -73
- symai/extended/.DS_Store +0 -0
- symai/extended/__init__.py +46 -12
- symai/extended/api_builder.py +11 -8
- symai/extended/arxiv_pdf_parser.py +13 -12
- symai/extended/bibtex_parser.py +2 -3
- symai/extended/conversation.py +101 -90
- symai/extended/document.py +17 -10
- symai/extended/file_merger.py +18 -13
- symai/extended/graph.py +18 -13
- symai/extended/html_style_template.py +2 -4
- symai/extended/interfaces/blip_2.py +1 -2
- symai/extended/interfaces/clip.py +1 -2
- symai/extended/interfaces/console.py +7 -1
- symai/extended/interfaces/dall_e.py +1 -1
- symai/extended/interfaces/flux.py +1 -1
- symai/extended/interfaces/gpt_image.py +1 -1
- symai/extended/interfaces/input.py +1 -1
- symai/extended/interfaces/llava.py +0 -1
- symai/extended/interfaces/naive_vectordb.py +7 -8
- symai/extended/interfaces/naive_webscraping.py +1 -1
- symai/extended/interfaces/ocr.py +1 -1
- symai/extended/interfaces/pinecone.py +6 -5
- symai/extended/interfaces/serpapi.py +1 -1
- symai/extended/interfaces/terminal.py +2 -3
- symai/extended/interfaces/tts.py +1 -1
- symai/extended/interfaces/whisper.py +1 -1
- symai/extended/interfaces/wolframalpha.py +1 -1
- symai/extended/metrics/__init__.py +11 -1
- symai/extended/metrics/similarity.py +11 -13
- symai/extended/os_command.py +17 -16
- symai/extended/packages/__init__.py +29 -3
- symai/extended/packages/symdev.py +19 -16
- symai/extended/packages/sympkg.py +12 -9
- symai/extended/packages/symrun.py +21 -19
- symai/extended/repo_cloner.py +11 -10
- symai/extended/seo_query_optimizer.py +1 -2
- symai/extended/solver.py +20 -23
- symai/extended/summarizer.py +4 -3
- symai/extended/taypan_interpreter.py +10 -12
- symai/extended/vectordb.py +99 -82
- symai/formatter/__init__.py +9 -1
- symai/formatter/formatter.py +12 -16
- symai/formatter/regex.py +62 -63
- symai/functional.py +176 -122
- symai/imports.py +136 -127
- symai/interfaces.py +56 -27
- symai/memory.py +14 -13
- symai/misc/console.py +49 -39
- symai/misc/loader.py +5 -3
- symai/models/__init__.py +17 -1
- symai/models/base.py +269 -181
- symai/models/errors.py +0 -1
- symai/ops/__init__.py +32 -22
- symai/ops/measures.py +11 -15
- symai/ops/primitives.py +348 -228
- symai/post_processors.py +32 -28
- symai/pre_processors.py +39 -41
- symai/processor.py +6 -4
- symai/prompts.py +59 -45
- symai/server/huggingface_server.py +23 -20
- symai/server/llama_cpp_server.py +7 -5
- symai/shell.py +3 -4
- symai/shellsv.py +499 -375
- symai/strategy.py +517 -287
- symai/symbol.py +111 -116
- symai/utils.py +42 -36
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/METADATA +4 -2
- symbolicai-1.0.0.dist-info/RECORD +163 -0
- symbolicai-0.20.2.dist-info/RECORD +0 -162
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/WHEEL +0 -0
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/entry_points.txt +0 -0
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -9,7 +9,7 @@ from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
|
|
|
9
9
|
from openai import OpenAI
|
|
10
10
|
|
|
11
11
|
from ....symbol import Result
|
|
12
|
-
from ....utils import
|
|
12
|
+
from ....utils import UserMessage
|
|
13
13
|
from ...base import Engine
|
|
14
14
|
from ...mixin import OPENAI_CHAT_MODELS, OPENAI_REASONING_MODELS
|
|
15
15
|
from ...settings import SYMAI_CONFIG
|
|
@@ -40,7 +40,7 @@ class SearchResult(Result):
|
|
|
40
40
|
def __init__(self, value, **kwargs) -> None:
|
|
41
41
|
super().__init__(value, **kwargs)
|
|
42
42
|
if value.get('error'):
|
|
43
|
-
|
|
43
|
+
UserMessage(value['error'], raise_with=ValueError)
|
|
44
44
|
try:
|
|
45
45
|
text, annotations = self._extract_text_and_annotations(value)
|
|
46
46
|
if text is None:
|
|
@@ -56,7 +56,7 @@ class SearchResult(Result):
|
|
|
56
56
|
|
|
57
57
|
except Exception as e:
|
|
58
58
|
self._value = None
|
|
59
|
-
|
|
59
|
+
UserMessage(f"Failed to parse response: {e}", raise_with=ValueError)
|
|
60
60
|
|
|
61
61
|
def _extract_text(self, value) -> str | None:
|
|
62
62
|
if isinstance(value.get('output_text'), str) and value.get('output_text'):
|
|
@@ -65,7 +65,7 @@ class SearchResult(Result):
|
|
|
65
65
|
for output in value.get('output', []):
|
|
66
66
|
if output.get('type') == 'message' and output.get('content'):
|
|
67
67
|
content0 = output['content'][0]
|
|
68
|
-
if
|
|
68
|
+
if content0.get('text'):
|
|
69
69
|
text = content0['text']
|
|
70
70
|
return text
|
|
71
71
|
|
|
@@ -223,7 +223,7 @@ class SearchResult(Result):
|
|
|
223
223
|
try:
|
|
224
224
|
return f"<pre>{json.dumps(self.raw, indent=2)}</pre>"
|
|
225
225
|
except Exception:
|
|
226
|
-
return f"<pre>{
|
|
226
|
+
return f"<pre>{self.raw!s}</pre>"
|
|
227
227
|
|
|
228
228
|
def get_citations(self) -> list[Citation]:
|
|
229
229
|
return self._citations
|
|
@@ -244,7 +244,7 @@ class GPTXSearchEngine(Engine):
|
|
|
244
244
|
try:
|
|
245
245
|
self.client = OpenAI(api_key=self.api_key)
|
|
246
246
|
except Exception as e:
|
|
247
|
-
|
|
247
|
+
UserMessage(f"Failed to initialize OpenAI client: {e}", raise_with=ValueError)
|
|
248
248
|
|
|
249
249
|
def id(self) -> str:
|
|
250
250
|
if self.config.get('SEARCH_ENGINE_API_KEY') and \
|
|
@@ -350,7 +350,7 @@ class GPTXSearchEngine(Engine):
|
|
|
350
350
|
res = self.client.responses.create(**payload)
|
|
351
351
|
res = SearchResult(res.dict())
|
|
352
352
|
except Exception as e:
|
|
353
|
-
|
|
353
|
+
UserMessage(f"Failed to make request: {e}", raise_with=ValueError)
|
|
354
354
|
|
|
355
355
|
metadata = {"raw_output": res.raw}
|
|
356
356
|
output = [res]
|
|
@@ -4,7 +4,7 @@ import logging
|
|
|
4
4
|
import requests
|
|
5
5
|
|
|
6
6
|
from ....symbol import Result
|
|
7
|
-
from ....utils import
|
|
7
|
+
from ....utils import UserMessage
|
|
8
8
|
from ...base import Engine
|
|
9
9
|
from ...settings import SYMAI_CONFIG
|
|
10
10
|
|
|
@@ -18,12 +18,12 @@ class SearchResult(Result):
|
|
|
18
18
|
def __init__(self, value, **kwargs) -> None:
|
|
19
19
|
super().__init__(value, **kwargs)
|
|
20
20
|
if value.get('error'):
|
|
21
|
-
|
|
21
|
+
UserMessage(value['error'], raise_with=ValueError)
|
|
22
22
|
try:
|
|
23
23
|
self._value = value['choices'][0]['message']['content']
|
|
24
24
|
except Exception as e:
|
|
25
25
|
self._value = None
|
|
26
|
-
|
|
26
|
+
UserMessage(f"Failed to parse response: {e}", raise_with=ValueError)
|
|
27
27
|
|
|
28
28
|
def __str__(self) -> str:
|
|
29
29
|
try:
|
|
@@ -35,7 +35,7 @@ class SearchResult(Result):
|
|
|
35
35
|
try:
|
|
36
36
|
return f"<pre>{json.dumps(self.raw, indent=2)}</pre>"
|
|
37
37
|
except TypeError:
|
|
38
|
-
return f"<pre>{
|
|
38
|
+
return f"<pre>{self.raw!s}</pre>"
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
class PerplexityEngine(Engine):
|
|
@@ -91,7 +91,7 @@ class PerplexityEngine(Engine):
|
|
|
91
91
|
res = requests.post("https://api.perplexity.ai/chat/completions", json=payload, headers=headers)
|
|
92
92
|
res = SearchResult(res.json())
|
|
93
93
|
except Exception as e:
|
|
94
|
-
|
|
94
|
+
UserMessage(f"Failed to make request: {e}", raise_with=ValueError)
|
|
95
95
|
|
|
96
96
|
metadata = {"raw_output": res.raw}
|
|
97
97
|
output = [res]
|
|
@@ -3,31 +3,31 @@ import json
|
|
|
3
3
|
from IPython.utils import io
|
|
4
4
|
|
|
5
5
|
from ....symbol import Result
|
|
6
|
-
from ....utils import
|
|
6
|
+
from ....utils import UserMessage
|
|
7
7
|
from ...base import Engine
|
|
8
8
|
from ...settings import SYMAI_CONFIG
|
|
9
9
|
|
|
10
10
|
try:
|
|
11
11
|
from serpapi import GoogleSearch
|
|
12
|
-
except:
|
|
12
|
+
except ImportError:
|
|
13
13
|
GoogleSearch = None
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class SearchResult(Result):
|
|
17
17
|
def __init__(self, value, **kwargs) -> None:
|
|
18
18
|
super().__init__(value, **kwargs)
|
|
19
|
-
if 'answer_box' in value
|
|
19
|
+
if 'answer_box' in value and 'answer' in value['answer_box']:
|
|
20
20
|
self._value = value['answer_box']['answer']
|
|
21
|
-
elif 'answer_box' in value
|
|
21
|
+
elif 'answer_box' in value and 'snippet' in value['answer_box']:
|
|
22
22
|
self._value = value['answer_box']['snippet']
|
|
23
|
-
elif 'answer_box' in value
|
|
23
|
+
elif 'answer_box' in value and 'snippet_highlighted_words' in value['answer_box']:
|
|
24
24
|
self._value = value['answer_box']["snippet_highlighted_words"][0]
|
|
25
|
-
elif 'organic_results' in value and 'snippet' in value["organic_results"][0]
|
|
25
|
+
elif 'organic_results' in value and 'snippet' in value["organic_results"][0]:
|
|
26
26
|
self._value = value["organic_results"][0]['snippet']
|
|
27
27
|
else:
|
|
28
28
|
self._value = value
|
|
29
29
|
|
|
30
|
-
if 'organic_results' in value
|
|
30
|
+
if 'organic_results' in value:
|
|
31
31
|
self.results = value['organic_results']
|
|
32
32
|
if len(self.results) > 0:
|
|
33
33
|
self.links = [r['link'] for r in self.results]
|
|
@@ -38,12 +38,10 @@ class SearchResult(Result):
|
|
|
38
38
|
self.links = []
|
|
39
39
|
|
|
40
40
|
def __str__(self) -> str:
|
|
41
|
-
|
|
42
|
-
return json_str
|
|
41
|
+
return json.dumps(self.raw, indent=2)
|
|
43
42
|
|
|
44
43
|
def _repr_html_(self) -> str:
|
|
45
|
-
|
|
46
|
-
return json_str
|
|
44
|
+
return json.dumps(self.raw, indent=2)
|
|
47
45
|
|
|
48
46
|
|
|
49
47
|
class SerpApiEngine(Engine):
|
|
@@ -57,7 +55,7 @@ class SerpApiEngine(Engine):
|
|
|
57
55
|
def id(self) -> str:
|
|
58
56
|
if self.config.get('SEARCH_ENGINE_API_KEY') and self.config.get('SEARCH_ENGINE_MODEL') == "google": # only support Google for now
|
|
59
57
|
if GoogleSearch is None:
|
|
60
|
-
|
|
58
|
+
UserMessage('SerpApi is not installed. Please install it with `pip install symbolicai[serpapi]`')
|
|
61
59
|
return 'search'
|
|
62
60
|
return super().id() # default to unregistered
|
|
63
61
|
|
|
@@ -73,7 +71,7 @@ class SerpApiEngine(Engine):
|
|
|
73
71
|
kwargs = argument.kwargs
|
|
74
72
|
queries_ = queries if isinstance(queries, list) else [queries]
|
|
75
73
|
rsp = []
|
|
76
|
-
engine = kwargs
|
|
74
|
+
engine = kwargs.get('engine', self.engine)
|
|
77
75
|
|
|
78
76
|
for q in queries_:
|
|
79
77
|
query = {
|
|
@@ -86,7 +84,7 @@ class SerpApiEngine(Engine):
|
|
|
86
84
|
}
|
|
87
85
|
|
|
88
86
|
# send to Google
|
|
89
|
-
with io.capture_output()
|
|
87
|
+
with io.capture_output(): # disables prints from GoogleSearch
|
|
90
88
|
search = GoogleSearch(query)
|
|
91
89
|
res = search.get_dict()
|
|
92
90
|
|
|
@@ -1,19 +1,18 @@
|
|
|
1
|
-
import
|
|
1
|
+
import contextlib
|
|
2
2
|
import re
|
|
3
|
+
from collections.abc import Iterable
|
|
3
4
|
from itertools import takewhile
|
|
4
|
-
from typing import Iterable
|
|
5
5
|
|
|
6
6
|
import torch
|
|
7
7
|
|
|
8
8
|
from ....symbol import Expression, Result
|
|
9
|
-
from ....utils import
|
|
9
|
+
from ....utils import UserMessage
|
|
10
10
|
from ...base import Engine
|
|
11
11
|
from ...settings import SYMAI_CONFIG
|
|
12
12
|
|
|
13
13
|
try:
|
|
14
14
|
import whisper
|
|
15
|
-
from whisper.audio import
|
|
16
|
-
N_SAMPLES # @NOTE: sample_rate (16_000) * chunk_length (30) = 480_000
|
|
15
|
+
from whisper.audio import N_SAMPLES # @NOTE: sample_rate (16_000) * chunk_length (30) = 480_000
|
|
17
16
|
from whisper.tokenizer import get_tokenizer
|
|
18
17
|
except ImportError:
|
|
19
18
|
whisper = None
|
|
@@ -27,10 +26,10 @@ class WhisperTimestampsFormatter(Expression):
|
|
|
27
26
|
def forward(self, response: list[str]) -> str:
|
|
28
27
|
result = []
|
|
29
28
|
for i, interval in enumerate(response):
|
|
30
|
-
|
|
29
|
+
interval_tokens = self._filter_empty_string(interval)
|
|
31
30
|
prev_end = 0.0
|
|
32
31
|
prev_start = 0.0
|
|
33
|
-
for head, tail in zip(
|
|
32
|
+
for head, tail in zip(interval_tokens[::2], interval_tokens[1::2], strict=False):
|
|
34
33
|
start = self._get_timestamp(head)
|
|
35
34
|
end = self._get_timestamp(tail)
|
|
36
35
|
if start >= prev_end:
|
|
@@ -46,10 +45,7 @@ class WhisperTimestampsFormatter(Expression):
|
|
|
46
45
|
start = prev_end
|
|
47
46
|
else:
|
|
48
47
|
start += prev_end
|
|
49
|
-
if start + delta > 30
|
|
50
|
-
end = 30
|
|
51
|
-
else:
|
|
52
|
-
end = start + delta
|
|
48
|
+
end = 30 if start + delta > 30 else start + delta
|
|
53
49
|
prev_end = end
|
|
54
50
|
result.append(f"{self._format_to_hours(start + (i*30))} {self._get_sentence(head)}")
|
|
55
51
|
return "\n".join(result)
|
|
@@ -68,8 +64,7 @@ class WhisperTimestampsFormatter(Expression):
|
|
|
68
64
|
seconds %= 3600
|
|
69
65
|
minutes = int(seconds // 60)
|
|
70
66
|
seconds %= 60
|
|
71
|
-
|
|
72
|
-
return formatted_time
|
|
67
|
+
return f"{hours:02d}:{minutes:02d}:{int(seconds):02d}"
|
|
73
68
|
|
|
74
69
|
|
|
75
70
|
class WhisperResult(Result):
|
|
@@ -80,16 +75,16 @@ class WhisperResult(Result):
|
|
|
80
75
|
|
|
81
76
|
def get_bins(self, bin_size_s: int = 5 * 60) -> list[str]:
|
|
82
77
|
tmps = list(map(self._seconds, re.findall(r"\b\d{2}:\d{2}:\d{2}\b", self._value)))
|
|
83
|
-
value_pairs = list(zip(tmps, self._value.split("\n")))
|
|
84
|
-
|
|
78
|
+
value_pairs = list(zip(tmps, self._value.split("\n"), strict=False))
|
|
79
|
+
bin_segments = []
|
|
85
80
|
result = []
|
|
86
81
|
for tmp, seg in value_pairs:
|
|
87
|
-
|
|
82
|
+
bin_segments.append(seg)
|
|
88
83
|
if tmp == 0 or (tmp - bin_size_s) % bin_size_s != 0:
|
|
89
84
|
continue
|
|
90
|
-
result.append("\n".join(
|
|
91
|
-
|
|
92
|
-
result.append("\n".join(
|
|
85
|
+
result.append("\n".join(bin_segments))
|
|
86
|
+
bin_segments = []
|
|
87
|
+
result.append("\n".join(bin_segments))
|
|
93
88
|
return result
|
|
94
89
|
|
|
95
90
|
def _seconds(self, tmp: str) -> int:
|
|
@@ -115,7 +110,7 @@ class WhisperEngine(Engine):
|
|
|
115
110
|
try:
|
|
116
111
|
self.model = whisper.load_model(self.model_id, device=device)
|
|
117
112
|
except RuntimeError:
|
|
118
|
-
|
|
113
|
+
UserMessage(f"Whisper failed to load model on device {device}. Fallback to {device_fallback}.")
|
|
119
114
|
self.model = whisper.load_model(self.model_id, device=device_fallback)
|
|
120
115
|
self.old_model_id = self.model_id
|
|
121
116
|
|
|
@@ -124,7 +119,7 @@ class WhisperEngine(Engine):
|
|
|
124
119
|
def id(self) -> str:
|
|
125
120
|
if self.config['SPEECH_TO_TEXT_ENGINE_MODEL']:
|
|
126
121
|
if whisper is None:
|
|
127
|
-
|
|
122
|
+
UserMessage("Whisper is not installed. Please install it with `pip install symbolicai[whisper]`", raise_with=ImportError)
|
|
128
123
|
return 'speech-to-text'
|
|
129
124
|
return super().id() # default to unregistered
|
|
130
125
|
|
|
@@ -153,8 +148,8 @@ class WhisperEngine(Engine):
|
|
|
153
148
|
rsp = max(probs, key=probs.get)
|
|
154
149
|
elif prompt == 'decode':
|
|
155
150
|
if show_pbar:
|
|
156
|
-
#
|
|
157
|
-
from tqdm import tqdm
|
|
151
|
+
# Suppress tqdm warning; keep optional dependency lazy.
|
|
152
|
+
from tqdm import tqdm # noqa
|
|
158
153
|
pbar = tqdm(self._get_chunks(audio))
|
|
159
154
|
else:
|
|
160
155
|
pbar = self._get_chunks(audio)
|
|
@@ -180,7 +175,7 @@ class WhisperEngine(Engine):
|
|
|
180
175
|
else:
|
|
181
176
|
rsp = " ".join(self.text)
|
|
182
177
|
else:
|
|
183
|
-
|
|
178
|
+
UserMessage(f"Unknown whisper command prompt: {prompt}", raise_with=ValueError)
|
|
184
179
|
|
|
185
180
|
metadata = {}
|
|
186
181
|
rsp = WhisperResult(rsp)
|
|
@@ -204,7 +199,5 @@ class WhisperEngine(Engine):
|
|
|
204
199
|
yield torch.tensor(it[i:min(i + batch, size)]).to(self.model.device)
|
|
205
200
|
|
|
206
201
|
def _try_compile(self):
|
|
207
|
-
|
|
202
|
+
with contextlib.suppress(Exception):
|
|
208
203
|
self.model = torch.compile(self.model)
|
|
209
|
-
except Exception:
|
|
210
|
-
pass
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
from copy import deepcopy
|
|
2
2
|
|
|
3
3
|
from ....symbol import Result
|
|
4
|
-
from ....utils import
|
|
4
|
+
from ....utils import UserMessage
|
|
5
5
|
from ...base import Engine
|
|
6
6
|
from ...settings import SYMAI_CONFIG
|
|
7
7
|
|
|
8
8
|
try:
|
|
9
9
|
import wolframalpha as wa
|
|
10
10
|
except ImportError:
|
|
11
|
-
|
|
11
|
+
UserMessage("WolframAlpha is not installed. Please install it with `pip install symbolicai[wolframalpha]`", raise_with=ImportError)
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class WolframResult(Result):
|
|
@@ -45,7 +45,7 @@ class WolframAlphaEngine(Engine):
|
|
|
45
45
|
rsp = self.client.query(queries)
|
|
46
46
|
rsp = WolframResult(rsp)
|
|
47
47
|
except Exception as e:
|
|
48
|
-
|
|
48
|
+
UserMessage(f'Failed to interact with WolframAlpha: {e}.\n\n If you are getting an error related to "assert", that is a well-known issue with WolframAlpha. There is a manual fix for this issue: https://github.com/jaraco/wolframalpha/pull/34/commits/6eb3828ee812f65592e00629710fc027d40e7bd1', raise_with=ValueError)
|
|
49
49
|
|
|
50
50
|
metadata = {}
|
|
51
51
|
|
|
@@ -1,18 +1,17 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
|
|
3
3
|
from openai import OpenAI
|
|
4
|
-
from typing import Optional
|
|
5
|
-
|
|
6
|
-
# suppress openai logging
|
|
7
|
-
logging.getLogger("openai").setLevel(logging.WARNING)
|
|
8
4
|
|
|
5
|
+
from ....symbol import Result
|
|
9
6
|
from ...base import Engine
|
|
10
7
|
from ...settings import SYMAI_CONFIG
|
|
11
|
-
|
|
8
|
+
|
|
9
|
+
# suppress openai logging
|
|
10
|
+
logging.getLogger("openai").setLevel(logging.WARNING)
|
|
12
11
|
|
|
13
12
|
|
|
14
13
|
class TTSEngine(Engine):
|
|
15
|
-
def __init__(self, api_key:
|
|
14
|
+
def __init__(self, api_key: str | None = None, model: str | None = None):
|
|
16
15
|
super().__init__()
|
|
17
16
|
self.config = SYMAI_CONFIG
|
|
18
17
|
self.api_key = self.config['TEXT_TO_SPEECH_ENGINE_API_KEY'] if api_key is None else api_key
|
|
@@ -35,7 +34,6 @@ class TTSEngine(Engine):
|
|
|
35
34
|
self.model_id = kwargs['TEXT_TO_SPEECH_ENGINE_MODEL']
|
|
36
35
|
|
|
37
36
|
def forward(self, argument):
|
|
38
|
-
kwargs = argument.kwargs
|
|
39
37
|
voice, path, prompt = argument.prop.prepared_input
|
|
40
38
|
|
|
41
39
|
rsp = self.client.audio.speech.create(
|
|
@@ -1,22 +1,21 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
import
|
|
2
|
+
from io import BytesIO
|
|
3
3
|
|
|
4
|
+
import requests
|
|
4
5
|
import torch
|
|
5
|
-
from typing import Optional
|
|
6
6
|
from PIL import Image
|
|
7
|
-
from io import BytesIO
|
|
8
7
|
from transformers import CLIPModel, CLIPProcessor
|
|
9
8
|
|
|
9
|
+
from ....utils import UserMessage
|
|
10
10
|
from ...base import Engine
|
|
11
11
|
from ...settings import SYMAI_CONFIG
|
|
12
12
|
|
|
13
|
-
|
|
14
13
|
# supress warnings
|
|
15
14
|
logging.getLogger("PIL").setLevel(logging.WARNING)
|
|
16
15
|
|
|
17
16
|
|
|
18
17
|
class CLIPEngine(Engine):
|
|
19
|
-
def __init__(self, model:
|
|
18
|
+
def __init__(self, model: str | None = None):
|
|
20
19
|
super().__init__()
|
|
21
20
|
self.model = None # lazy loading
|
|
22
21
|
self.preprocessor = None # lazy loading
|
|
@@ -44,11 +43,8 @@ class CLIPEngine(Engine):
|
|
|
44
43
|
if isinstance(img, bytes):
|
|
45
44
|
images.append(Image.open(BytesIO(img)))
|
|
46
45
|
elif isinstance(img, str):
|
|
47
|
-
if img.startswith('http')
|
|
48
|
-
|
|
49
|
-
else:
|
|
50
|
-
image_ = img
|
|
51
|
-
image = Image.open(image_)
|
|
46
|
+
image_source = requests.get(img, stream=True).raw if img.startswith('http') else img
|
|
47
|
+
image = Image.open(image_source)
|
|
52
48
|
images.append(image)
|
|
53
49
|
return images
|
|
54
50
|
|
|
@@ -75,7 +71,7 @@ class CLIPEngine(Engine):
|
|
|
75
71
|
logits_per_image = outputs.logits_per_image # this is the image-text similarity score
|
|
76
72
|
rsp = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities
|
|
77
73
|
else:
|
|
78
|
-
|
|
74
|
+
UserMessage("CLIPEngine requires either image or text input.", raise_with=NotImplementedError)
|
|
79
75
|
|
|
80
76
|
rsp = rsp.squeeze().detach().cpu().numpy()
|
|
81
77
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from typing import List
|
|
2
1
|
|
|
2
|
+
from ....utils import UserMessage
|
|
3
3
|
from ...base import Engine
|
|
4
4
|
|
|
5
5
|
|
|
@@ -15,9 +15,9 @@ class UserInputEngine(Engine):
|
|
|
15
15
|
msg = argument.prop.prepared_input
|
|
16
16
|
kwargs = argument.kwargs
|
|
17
17
|
|
|
18
|
-
mock = kwargs
|
|
18
|
+
mock = kwargs.get('mock', False)
|
|
19
19
|
if mock: # mock user input
|
|
20
|
-
|
|
20
|
+
UserMessage(msg)
|
|
21
21
|
rsp = mock
|
|
22
22
|
else:
|
|
23
23
|
rsp = input(msg)
|
|
@@ -10,6 +10,7 @@ service disruption.
|
|
|
10
10
|
import io
|
|
11
11
|
import logging
|
|
12
12
|
import re
|
|
13
|
+
from typing import Any, ClassVar
|
|
13
14
|
from urllib.parse import parse_qsl, urlencode, urljoin, urlparse, urlunparse
|
|
14
15
|
|
|
15
16
|
import requests
|
|
@@ -19,6 +20,7 @@ from pdfminer.high_level import extract_text
|
|
|
19
20
|
from requests.structures import CaseInsensitiveDict
|
|
20
21
|
|
|
21
22
|
from ....symbol import Result
|
|
23
|
+
from ....utils import UserMessage
|
|
22
24
|
from ...base import Engine
|
|
23
25
|
|
|
24
26
|
logging.getLogger("pdfminer").setLevel(logging.WARNING)
|
|
@@ -56,14 +58,14 @@ class RequestsEngine(Engine):
|
|
|
56
58
|
the requests session stay aligned.
|
|
57
59
|
"""
|
|
58
60
|
|
|
59
|
-
COMMON_BYPASS_COOKIES = {
|
|
61
|
+
COMMON_BYPASS_COOKIES: ClassVar[dict[str, str]] = {
|
|
60
62
|
# Some forums display consent or age gates once if a friendly cookie is set.
|
|
61
63
|
"cookieconsent_status": "allow",
|
|
62
64
|
"accepted_cookies": "yes",
|
|
63
65
|
"age_verified": "1",
|
|
64
66
|
}
|
|
65
67
|
|
|
66
|
-
DEFAULT_HEADERS = {
|
|
68
|
+
DEFAULT_HEADERS: ClassVar[dict[str, str]] = {
|
|
67
69
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
68
70
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
69
71
|
"Chrome/120.0.0.0 Safari/537.36",
|
|
@@ -72,7 +74,7 @@ class RequestsEngine(Engine):
|
|
|
72
74
|
"DNT": "1",
|
|
73
75
|
}
|
|
74
76
|
|
|
75
|
-
_SAMESITE_CANONICAL = {
|
|
77
|
+
_SAMESITE_CANONICAL: ClassVar[dict[str, str]] = {
|
|
76
78
|
"strict": "Strict",
|
|
77
79
|
"lax": "Lax",
|
|
78
80
|
"none": "None",
|
|
@@ -156,6 +158,56 @@ class RequestsEngine(Engine):
|
|
|
156
158
|
payload["sameSite"] = same_site
|
|
157
159
|
return payload
|
|
158
160
|
|
|
161
|
+
def _collect_playwright_cookies(self, hostname: str) -> list[dict[str, Any]]:
|
|
162
|
+
if not hostname:
|
|
163
|
+
return []
|
|
164
|
+
cookie_payload = []
|
|
165
|
+
for cookie in self.session.cookies:
|
|
166
|
+
payload = self._playwright_cookie_payload(cookie, hostname)
|
|
167
|
+
if payload:
|
|
168
|
+
cookie_payload.append(payload)
|
|
169
|
+
return cookie_payload
|
|
170
|
+
|
|
171
|
+
@staticmethod
|
|
172
|
+
def _add_cookies_to_context(context, cookie_payload: list[dict[str, Any]]) -> None:
|
|
173
|
+
if cookie_payload:
|
|
174
|
+
context.add_cookies(cookie_payload)
|
|
175
|
+
|
|
176
|
+
@staticmethod
|
|
177
|
+
def _navigate_playwright_page(page, url: str, wait_selector: str | None, wait_until: str, timeout_ms: int, timeout_error):
|
|
178
|
+
try:
|
|
179
|
+
response = page.goto(url, wait_until=wait_until, timeout=timeout_ms)
|
|
180
|
+
if wait_selector:
|
|
181
|
+
page.wait_for_selector(wait_selector, timeout=timeout_ms)
|
|
182
|
+
return response, None
|
|
183
|
+
except timeout_error as exc:
|
|
184
|
+
return None, exc
|
|
185
|
+
|
|
186
|
+
@staticmethod
|
|
187
|
+
def _safe_page_content(page) -> str:
|
|
188
|
+
try:
|
|
189
|
+
return page.content()
|
|
190
|
+
except Exception:
|
|
191
|
+
return ""
|
|
192
|
+
|
|
193
|
+
def _sync_cookies_from_context(self, context) -> None:
|
|
194
|
+
for cookie in context.cookies():
|
|
195
|
+
self.session.cookies.set(
|
|
196
|
+
cookie["name"],
|
|
197
|
+
cookie["value"],
|
|
198
|
+
domain=cookie.get("domain"),
|
|
199
|
+
path=cookie.get("path", "/"),
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
@staticmethod
|
|
203
|
+
def _rendered_response_metadata(page, response):
|
|
204
|
+
final_url = page.url
|
|
205
|
+
status = response.status if response is not None else 200
|
|
206
|
+
headers = CaseInsensitiveDict(response.headers if response is not None else {})
|
|
207
|
+
if "content-type" not in headers:
|
|
208
|
+
headers["Content-Type"] = "text/html; charset=utf-8"
|
|
209
|
+
return final_url, status, headers
|
|
210
|
+
|
|
159
211
|
def _follow_meta_refresh(self, resp, timeout=15):
|
|
160
212
|
"""
|
|
161
213
|
Some old forums use <meta http-equiv="refresh" content="0;url=...">
|
|
@@ -180,19 +232,21 @@ class RequestsEngine(Engine):
|
|
|
180
232
|
return resp
|
|
181
233
|
return self.session.get(target, timeout=timeout, allow_redirects=True)
|
|
182
234
|
|
|
183
|
-
def _fetch_with_playwright(self, url: str, wait_selector: str = None, wait_until: str = "networkidle", timeout: float = None):
|
|
235
|
+
def _fetch_with_playwright(self, url: str, wait_selector: str | None = None, wait_until: str = "networkidle", timeout: float | None = None):
|
|
184
236
|
"""
|
|
185
237
|
Render the target URL in a headless browser to execute JavaScript and
|
|
186
238
|
return a synthetic ``requests.Response`` object to keep downstream
|
|
187
239
|
processing consistent with the non-JS path.
|
|
188
240
|
"""
|
|
189
241
|
try:
|
|
190
|
-
|
|
242
|
+
# Playwright is optional; import only when JS rendering is requested.
|
|
243
|
+
from playwright.sync_api import TimeoutError as PlaywrightTimeoutError # noqa
|
|
244
|
+
from playwright.sync_api import sync_playwright # noqa
|
|
191
245
|
logging.getLogger("playwright").setLevel(logging.WARNING)
|
|
192
246
|
except ImportError as exc:
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
) from exc
|
|
247
|
+
msg = "Playwright is not installed. Install symbolicai[webscraping] with Playwright extras to enable render_js."
|
|
248
|
+
UserMessage(msg)
|
|
249
|
+
raise RuntimeError(msg) from exc
|
|
196
250
|
|
|
197
251
|
timeout_seconds = timeout if timeout is not None else self.timeout
|
|
198
252
|
timeout_ms = max(int(timeout_seconds * 1000), 0)
|
|
@@ -200,12 +254,7 @@ class RequestsEngine(Engine):
|
|
|
200
254
|
|
|
201
255
|
parsed = urlparse(url)
|
|
202
256
|
hostname = parsed.hostname or ""
|
|
203
|
-
cookie_payload =
|
|
204
|
-
if hostname:
|
|
205
|
-
for cookie in self.session.cookies:
|
|
206
|
-
payload = self._playwright_cookie_payload(cookie, hostname)
|
|
207
|
-
if payload:
|
|
208
|
-
cookie_payload.append(payload)
|
|
257
|
+
cookie_payload = self._collect_playwright_cookies(hostname)
|
|
209
258
|
|
|
210
259
|
content = ""
|
|
211
260
|
final_url = url
|
|
@@ -219,42 +268,26 @@ class RequestsEngine(Engine):
|
|
|
219
268
|
java_script_enabled=True,
|
|
220
269
|
ignore_https_errors=not self.verify_ssl,
|
|
221
270
|
)
|
|
222
|
-
if cookie_payload:
|
|
223
|
-
context.add_cookies(cookie_payload)
|
|
224
|
-
page = context.new_page()
|
|
225
|
-
|
|
226
|
-
navigation_error = None
|
|
227
|
-
response = None
|
|
228
271
|
try:
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
cookie["name"],
|
|
245
|
-
cookie["value"],
|
|
246
|
-
domain=cookie.get("domain"),
|
|
247
|
-
path=cookie.get("path", "/"),
|
|
248
|
-
)
|
|
249
|
-
|
|
250
|
-
final_url = page.url
|
|
251
|
-
status = response.status if response is not None else 200
|
|
252
|
-
headers = CaseInsensitiveDict(response.headers if response is not None else {})
|
|
253
|
-
if "content-type" not in headers:
|
|
254
|
-
headers["Content-Type"] = "text/html; charset=utf-8"
|
|
255
|
-
|
|
272
|
+
self._add_cookies_to_context(context, cookie_payload)
|
|
273
|
+
page = context.new_page()
|
|
274
|
+
|
|
275
|
+
response, navigation_error = self._navigate_playwright_page(
|
|
276
|
+
page,
|
|
277
|
+
url,
|
|
278
|
+
wait_selector,
|
|
279
|
+
wait_until,
|
|
280
|
+
timeout_ms,
|
|
281
|
+
PlaywrightTimeoutError,
|
|
282
|
+
)
|
|
283
|
+
content = self._safe_page_content(page)
|
|
284
|
+
self._sync_cookies_from_context(context)
|
|
285
|
+
|
|
286
|
+
final_url, status, headers = self._rendered_response_metadata(page, response)
|
|
256
287
|
if navigation_error and not content:
|
|
257
|
-
|
|
288
|
+
msg = f"Playwright timed out while rendering {url}"
|
|
289
|
+
UserMessage(msg)
|
|
290
|
+
raise requests.exceptions.Timeout(msg) from navigation_error
|
|
258
291
|
finally:
|
|
259
292
|
context.close()
|
|
260
293
|
browser.close()
|
symai/backend/mixin/__init__.py
CHANGED
|
@@ -8,3 +8,16 @@ from .groq import SUPPORTED_CHAT_MODELS as GROQ_CHAT_MODELS
|
|
|
8
8
|
from .groq import SUPPORTED_REASONING_MODELS as GROQ_REASONING_MODELS
|
|
9
9
|
from .openai import SUPPORTED_CHAT_MODELS as OPENAI_CHAT_MODELS
|
|
10
10
|
from .openai import SUPPORTED_REASONING_MODELS as OPENAI_REASONING_MODELS
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"ANTHROPIC_CHAT_MODELS",
|
|
14
|
+
"ANTHROPIC_REASONING_MODELS",
|
|
15
|
+
"DEEPSEEK_CHAT_MODELS",
|
|
16
|
+
"DEEPSEEK_REASONING_MODELS",
|
|
17
|
+
"GOOGLE_CHAT_MODELS",
|
|
18
|
+
"GOOGLE_REASONING_MODELS",
|
|
19
|
+
"GROQ_CHAT_MODELS",
|
|
20
|
+
"GROQ_REASONING_MODELS",
|
|
21
|
+
"OPENAI_CHAT_MODELS",
|
|
22
|
+
"OPENAI_REASONING_MODELS",
|
|
23
|
+
]
|