symbolicai 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. symai/__init__.py +198 -134
  2. symai/backend/base.py +51 -51
  3. symai/backend/engines/drawing/engine_bfl.py +33 -33
  4. symai/backend/engines/drawing/engine_gpt_image.py +4 -10
  5. symai/backend/engines/embedding/engine_llama_cpp.py +50 -35
  6. symai/backend/engines/embedding/engine_openai.py +22 -16
  7. symai/backend/engines/execute/engine_python.py +16 -16
  8. symai/backend/engines/files/engine_io.py +51 -49
  9. symai/backend/engines/imagecaptioning/engine_blip2.py +27 -23
  10. symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +53 -46
  11. symai/backend/engines/index/engine_pinecone.py +116 -88
  12. symai/backend/engines/index/engine_qdrant.py +1011 -0
  13. symai/backend/engines/index/engine_vectordb.py +78 -52
  14. symai/backend/engines/lean/engine_lean4.py +65 -25
  15. symai/backend/engines/neurosymbolic/__init__.py +28 -28
  16. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +137 -135
  17. symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +145 -152
  18. symai/backend/engines/neurosymbolic/engine_cerebras.py +328 -0
  19. symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +75 -49
  20. symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +199 -155
  21. symai/backend/engines/neurosymbolic/engine_groq.py +106 -72
  22. symai/backend/engines/neurosymbolic/engine_huggingface.py +100 -67
  23. symai/backend/engines/neurosymbolic/engine_llama_cpp.py +121 -93
  24. symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +213 -132
  25. symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +180 -137
  26. symai/backend/engines/ocr/engine_apilayer.py +18 -20
  27. symai/backend/engines/output/engine_stdout.py +9 -9
  28. symai/backend/engines/{webscraping → scrape}/engine_requests.py +25 -11
  29. symai/backend/engines/search/engine_openai.py +95 -83
  30. symai/backend/engines/search/engine_parallel.py +665 -0
  31. symai/backend/engines/search/engine_perplexity.py +40 -41
  32. symai/backend/engines/search/engine_serpapi.py +33 -28
  33. symai/backend/engines/speech_to_text/engine_local_whisper.py +37 -27
  34. symai/backend/engines/symbolic/engine_wolframalpha.py +14 -8
  35. symai/backend/engines/text_to_speech/engine_openai.py +15 -19
  36. symai/backend/engines/text_vision/engine_clip.py +34 -28
  37. symai/backend/engines/userinput/engine_console.py +3 -4
  38. symai/backend/mixin/anthropic.py +48 -40
  39. symai/backend/mixin/deepseek.py +4 -5
  40. symai/backend/mixin/google.py +5 -4
  41. symai/backend/mixin/groq.py +2 -4
  42. symai/backend/mixin/openai.py +132 -110
  43. symai/backend/settings.py +14 -14
  44. symai/chat.py +164 -94
  45. symai/collect/dynamic.py +13 -11
  46. symai/collect/pipeline.py +39 -31
  47. symai/collect/stats.py +109 -69
  48. symai/components.py +556 -238
  49. symai/constraints.py +14 -5
  50. symai/core.py +1495 -1210
  51. symai/core_ext.py +55 -50
  52. symai/endpoints/api.py +113 -58
  53. symai/extended/api_builder.py +22 -17
  54. symai/extended/arxiv_pdf_parser.py +13 -5
  55. symai/extended/bibtex_parser.py +8 -4
  56. symai/extended/conversation.py +88 -69
  57. symai/extended/document.py +40 -27
  58. symai/extended/file_merger.py +45 -7
  59. symai/extended/graph.py +38 -24
  60. symai/extended/html_style_template.py +17 -11
  61. symai/extended/interfaces/blip_2.py +1 -1
  62. symai/extended/interfaces/clip.py +4 -2
  63. symai/extended/interfaces/console.py +5 -3
  64. symai/extended/interfaces/dall_e.py +3 -1
  65. symai/extended/interfaces/file.py +2 -0
  66. symai/extended/interfaces/flux.py +3 -1
  67. symai/extended/interfaces/gpt_image.py +15 -6
  68. symai/extended/interfaces/input.py +2 -1
  69. symai/extended/interfaces/llava.py +1 -1
  70. symai/extended/interfaces/{naive_webscraping.py → naive_scrape.py} +3 -2
  71. symai/extended/interfaces/naive_vectordb.py +2 -2
  72. symai/extended/interfaces/ocr.py +4 -2
  73. symai/extended/interfaces/openai_search.py +2 -0
  74. symai/extended/interfaces/parallel.py +30 -0
  75. symai/extended/interfaces/perplexity.py +2 -0
  76. symai/extended/interfaces/pinecone.py +6 -4
  77. symai/extended/interfaces/python.py +2 -0
  78. symai/extended/interfaces/serpapi.py +2 -0
  79. symai/extended/interfaces/terminal.py +0 -1
  80. symai/extended/interfaces/tts.py +2 -1
  81. symai/extended/interfaces/whisper.py +2 -1
  82. symai/extended/interfaces/wolframalpha.py +1 -0
  83. symai/extended/metrics/__init__.py +1 -1
  84. symai/extended/metrics/similarity.py +5 -2
  85. symai/extended/os_command.py +31 -22
  86. symai/extended/packages/symdev.py +39 -34
  87. symai/extended/packages/sympkg.py +30 -27
  88. symai/extended/packages/symrun.py +46 -35
  89. symai/extended/repo_cloner.py +10 -9
  90. symai/extended/seo_query_optimizer.py +15 -12
  91. symai/extended/solver.py +104 -76
  92. symai/extended/summarizer.py +8 -7
  93. symai/extended/taypan_interpreter.py +10 -9
  94. symai/extended/vectordb.py +28 -15
  95. symai/formatter/formatter.py +39 -31
  96. symai/formatter/regex.py +46 -44
  97. symai/functional.py +184 -86
  98. symai/imports.py +85 -51
  99. symai/interfaces.py +1 -1
  100. symai/memory.py +33 -24
  101. symai/menu/screen.py +28 -19
  102. symai/misc/console.py +27 -27
  103. symai/misc/loader.py +4 -3
  104. symai/models/base.py +147 -76
  105. symai/models/errors.py +1 -1
  106. symai/ops/__init__.py +1 -1
  107. symai/ops/measures.py +17 -14
  108. symai/ops/primitives.py +933 -635
  109. symai/post_processors.py +28 -24
  110. symai/pre_processors.py +58 -52
  111. symai/processor.py +15 -9
  112. symai/prompts.py +714 -649
  113. symai/server/huggingface_server.py +115 -32
  114. symai/server/llama_cpp_server.py +14 -6
  115. symai/server/qdrant_server.py +206 -0
  116. symai/shell.py +98 -39
  117. symai/shellsv.py +307 -223
  118. symai/strategy.py +135 -81
  119. symai/symbol.py +276 -225
  120. symai/utils.py +62 -46
  121. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/METADATA +19 -9
  122. symbolicai-1.1.0.dist-info/RECORD +168 -0
  123. symbolicai-1.0.0.dist-info/RECORD +0 -163
  124. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/WHEEL +0 -0
  125. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/entry_points.txt +0 -0
  126. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/licenses/LICENSE +0 -0
  127. {symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/top_level.txt +0 -0
symai/extended/solver.py CHANGED
@@ -57,14 +57,15 @@ $> Max is 2 years older than his brother. In 5 years, Max will be 3 times as old
57
57
  --------------
58
58
  """
59
59
 
60
+
60
61
  class ProblemClassifierPreProcessor(PreProcessor):
61
62
  def __call__(self, argument):
62
- return f'$> {argument.prop.instance!s}\n//'
63
+ return f"$> {argument.prop.instance!s}\n//"
63
64
 
64
65
 
65
66
  class OptionsPreProcessor(PreProcessor):
66
67
  def __call__(self, argument):
67
- return f'$> :{argument.prop.instance!s}: == :{argument.args[0]!s}: =>'
68
+ return f"$> :{argument.prop.instance!s}: == :{argument.args[0]!s}: =>"
68
69
 
69
70
 
70
71
  class ProblemClassifier(Expression):
@@ -75,32 +76,41 @@ class ProblemClassifier(Expression):
75
76
  return PROBLEM_CATEGORY_CONTEXT
76
77
 
77
78
  def __eq__(self, other, **kwargs) -> bool:
78
- @core.few_shot(prompt="Verify equality of the following categories. Ignore typos, upper / lower case or singular / plural differences:\n",
79
- examples=Prompt([
80
- '$> :Arithmetic formula: == :Arithmetics formula: =>True EOF',
81
- '$> :arithmetic formula: == :Arithmetic formula: =>True EOF',
82
- '$> :arithmetic formula: == :arithmeticformula: =>True EOF',
83
- '$> :arithmetic formula: == :Implication and logical expressions: =>False EOF',
84
- '$> :Linear algebra: == :Implication and logical expressions: =>False EOF',
85
- '$> :Linear algebra: == :Unknown category: =>False EOF',
86
- '$> :Linear algebra: == :Linear algebra: =>True EOF',
87
- '$> :Probability and statistics: == :Probabilities and statistics: =>True EOF',
88
- '$> :PROBABILITY AND STATISTICS: == :Probability and statistics: =>True EOF',
89
- '$> :PROBABILITY AND STATISTICS: == :UNKNOWN CATEGORY: =>False EOF',
90
- ]),
91
- pre_processors=[OptionsPreProcessor()],
92
- post_processors=[StripPostProcessor()],
93
- stop=['EOF'], **kwargs)
79
+ @core.few_shot(
80
+ prompt="Verify equality of the following categories. Ignore typos, upper / lower case or singular / plural differences:\n",
81
+ examples=Prompt(
82
+ [
83
+ "$> :Arithmetic formula: == :Arithmetics formula: =>True EOF",
84
+ "$> :arithmetic formula: == :Arithmetic formula: =>True EOF",
85
+ "$> :arithmetic formula: == :arithmeticformula: =>True EOF",
86
+ "$> :arithmetic formula: == :Implication and logical expressions: =>False EOF",
87
+ "$> :Linear algebra: == :Implication and logical expressions: =>False EOF",
88
+ "$> :Linear algebra: == :Unknown category: =>False EOF",
89
+ "$> :Linear algebra: == :Linear algebra: =>True EOF",
90
+ "$> :Probability and statistics: == :Probabilities and statistics: =>True EOF",
91
+ "$> :PROBABILITY AND STATISTICS: == :Probability and statistics: =>True EOF",
92
+ "$> :PROBABILITY AND STATISTICS: == :UNKNOWN CATEGORY: =>False EOF",
93
+ ]
94
+ ),
95
+ pre_processors=[OptionsPreProcessor()],
96
+ post_processors=[StripPostProcessor()],
97
+ stop=["EOF"],
98
+ **kwargs,
99
+ )
94
100
  def _func(_, other) -> bool:
95
101
  pass
102
+
96
103
  return _func(self, other)
97
104
 
98
105
  def forward(self, **kwargs) -> str:
99
- @core.few_shot(prompt="Classify the user query to the mathematical classes:\n",
100
- examples=[],
101
- pre_processors=[ProblemClassifierPreProcessor()],
102
- post_processors=[StripPostProcessor()],
103
- stop=['EOF'], **kwargs)
106
+ @core.few_shot(
107
+ prompt="Classify the user query to the mathematical classes:\n",
108
+ examples=[],
109
+ pre_processors=[ProblemClassifierPreProcessor()],
110
+ post_processors=[StripPostProcessor()],
111
+ stop=["EOF"],
112
+ **kwargs,
113
+ )
104
114
  def _func(_) -> str:
105
115
  pass
106
116
 
@@ -109,34 +119,40 @@ class ProblemClassifier(Expression):
109
119
 
110
120
  class FormulaCheckerPreProcessor(PreProcessor):
111
121
  def __call__(self, argument):
112
- return f'$> {argument.prop.instance!s} =>'
122
+ return f"$> {argument.prop.instance!s} =>"
113
123
 
114
124
 
115
125
  class FormulaChecker(Expression):
116
126
  def forward(self, **kwargs) -> bool:
117
- @core.few_shot(prompt="Is the following statement in an explicit formula form without natural language text?:\n",
118
- examples=Prompt([
119
- '$> 2 + 2 * 2 =>True EOF',
120
- '$> x + 2 = 3 =>True EOF',
121
- '$> Set of all natural numbers =>False EOF',
122
- '$> Probability of drawing a red ball =>False EOF',
123
- '$> (a + b) * (a - b) =>True EOF',
124
- '$> Add the square root of nine to the square root of x =>False EOF',
125
- '$> Five plus two equals seven =>False EOF',
126
- '$> 5 + 2 = 7 =>True EOF',
127
- '$> x is seven =>False EOF',
128
- '$> x = 7 =>True EOF',
129
- '$> Anna has two apples. She gives one to her brother. How many apples does Anna have now? =>False EOF',
130
- '$> 0.447662 =>True EOF',
131
- '$> Subtract the x from y squared =>False EOF',
132
- '$> The sum of the first n natural numbers =>False EOF',
133
- '$> Sum[x=5, {i=0, n=10}] =>True EOF',
134
- ]),
135
- pre_processors=[FormulaCheckerPreProcessor()],
136
- post_processors=[StripPostProcessor()],
137
- stop=['EOF'], **kwargs)
127
+ @core.few_shot(
128
+ prompt="Is the following statement in an explicit formula form without natural language text?:\n",
129
+ examples=Prompt(
130
+ [
131
+ "$> 2 + 2 * 2 =>True EOF",
132
+ "$> x + 2 = 3 =>True EOF",
133
+ "$> Set of all natural numbers =>False EOF",
134
+ "$> Probability of drawing a red ball =>False EOF",
135
+ "$> (a + b) * (a - b) =>True EOF",
136
+ "$> Add the square root of nine to the square root of x =>False EOF",
137
+ "$> Five plus two equals seven =>False EOF",
138
+ "$> 5 + 2 = 7 =>True EOF",
139
+ "$> x is seven =>False EOF",
140
+ "$> x = 7 =>True EOF",
141
+ "$> Anna has two apples. She gives one to her brother. How many apples does Anna have now? =>False EOF",
142
+ "$> 0.447662 =>True EOF",
143
+ "$> Subtract the x from y squared =>False EOF",
144
+ "$> The sum of the first n natural numbers =>False EOF",
145
+ "$> Sum[x=5, {i=0, n=10}] =>True EOF",
146
+ ]
147
+ ),
148
+ pre_processors=[FormulaCheckerPreProcessor()],
149
+ post_processors=[StripPostProcessor()],
150
+ stop=["EOF"],
151
+ **kwargs,
152
+ )
138
153
  def _func(_) -> bool:
139
154
  pass
155
+
140
156
  return _func(self)
141
157
 
142
158
 
@@ -149,27 +165,33 @@ class FormulaChecker(Expression):
149
165
 
150
166
  class FormulaWriterPreProcessor(PreProcessor):
151
167
  def __call__(self, argument):
152
- return f'$> {argument.prop.instance!s} =>'
168
+ return f"$> {argument.prop.instance!s} =>"
153
169
 
154
170
 
155
171
  class FormulaWriter(Expression):
156
172
  def forward(self, **kwargs) -> str:
157
- @core.few_shot(prompt="Rewrite the following natural language statement in a mathematical formula or higher-order logic statement to be solved by Mathematica:\n",
158
- examples=Prompt([
159
- '$> Add 5 plus 3 =>5 + 3 EOF',
160
- '$> Seventy plus twenty =>70 + 20 EOF',
161
- '$> Divide 5 by three =>5 / 3 EOF',
162
- '$> The square root of pi plus x. =>Sqrt[Pi + x] EOF',
163
- '$> Eight point five six seven one four two seven =>8.5671427 EOF',
164
- '$> Give a solution for a quadratic equation x^2 + 2x + 1 =>Solve[x^2 + 2x + 1 ==0, x] EOF',
165
- '$> Sum x n times from i equals 0 to n equals 10. x is equals to 5. =>Sum[x=5, {i=0, n=10}] EOF',
166
- '$> Multiply the first statement in brackets a plus b times the second term in brackets c minus d =>(a + b) * (c - d) EOF'
167
- ]),
168
- pre_processors=[FormulaWriterPreProcessor()],
169
- post_processors=[StripPostProcessor()],
170
- stop=['EOF'], **kwargs)
173
+ @core.few_shot(
174
+ prompt="Rewrite the following natural language statement in a mathematical formula or higher-order logic statement to be solved by Mathematica:\n",
175
+ examples=Prompt(
176
+ [
177
+ "$> Add 5 plus 3 =>5 + 3 EOF",
178
+ "$> Seventy plus twenty =>70 + 20 EOF",
179
+ "$> Divide 5 by three =>5 / 3 EOF",
180
+ "$> The square root of pi plus x. =>Sqrt[Pi + x] EOF",
181
+ "$> Eight point five six seven one four two seven =>8.5671427 EOF",
182
+ "$> Give a solution for a quadratic equation x^2 + 2x + 1 =>Solve[x^2 + 2x + 1 ==0, x] EOF",
183
+ "$> Sum x n times from i equals 0 to n equals 10. x is equals to 5. =>Sum[x=5, {i=0, n=10}] EOF",
184
+ "$> Multiply the first statement in brackets a plus b times the second term in brackets c minus d =>(a + b) * (c - d) EOF",
185
+ ]
186
+ ),
187
+ pre_processors=[FormulaWriterPreProcessor()],
188
+ post_processors=[StripPostProcessor()],
189
+ stop=["EOF"],
190
+ **kwargs,
191
+ )
171
192
  def _func(_) -> str:
172
193
  pass
194
+
173
195
  return _func(self)
174
196
 
175
197
 
@@ -199,17 +221,19 @@ _value_obj_ = problem_statement
199
221
 
200
222
  class SATSolver(Expression):
201
223
  def forward(self, code):
202
- assert z3 is not None, "The z3 library is not installed. Please install it using `pip install 'symbolicai[solver]'` and try again."
224
+ assert z3 is not None, (
225
+ "The z3 library is not installed. Please install it using `pip install 'symbolicai[solver]'` and try again."
226
+ )
203
227
  # Create the execution template
204
- runner = Execute(enclosure=True)
228
+ runner = Execute(enclosure=True)
205
229
  # Execute the code
206
230
  statement = runner(code)
207
231
  # Create a new solver instance
208
- S = z3.Solver()
232
+ S = z3.Solver()
209
233
  # Create a new query
210
- query = statement['locals']['_output_'](S)
234
+ query = statement["locals"]["_output_"](S)
211
235
  # Check if the query can be solved
212
- r = S.check()
236
+ r = S.check()
213
237
  # Print the solution
214
238
  if r == z3.sat:
215
239
  # Get the model
@@ -232,8 +256,8 @@ class Solver(Expression):
232
256
  super().__init__(**kwargs)
233
257
  self.sym_return_type = Solver
234
258
  self.solver = SATSolver()
235
- self.conv = Conversation(init=LOGIC_TEMPLATE)
236
- self.pp = CodeExtractPostProcessor()
259
+ self.conv = Conversation(init=LOGIC_TEMPLATE)
260
+ self.pp = CodeExtractPostProcessor()
237
261
 
238
262
  def rewrite_formula(self, sym, **kwargs):
239
263
  formula = sym
@@ -247,16 +271,20 @@ class Solver(Expression):
247
271
  classifier = ProblemClassifier(sym)
248
272
  problem = classifier(**kwargs)
249
273
 
250
- if problem == 'Arithmetics formula' or problem == 'Equations':
274
+ if problem == "Arithmetics formula" or problem == "Equations":
251
275
  formula = self.rewrite_formula(sym, **kwargs)
252
276
  UserMessage(str(formula))
253
- elif problem == 'Implication and logical expressions':
254
- res = self.conv(sym, **kwargs)
255
- code = self.pp(str(res), None, tag="python")
256
- formula = self.solver(code, lambda: 'German')
277
+ elif problem == "Implication and logical expressions":
278
+ res = self.conv(sym, **kwargs)
279
+ code = self.pp(str(res), None, tag="python")
280
+ formula = self.solver(code, lambda: "German")
257
281
  UserMessage(str(formula))
258
- elif problem == 'Probability and statistics' or problem == 'Linear algebra' or problem == 'Linguistic problem with relations':
259
- UserMessage('This feature is not yet implemented.', raise_with=NotImplementedError)
282
+ elif (
283
+ problem == "Probability and statistics"
284
+ or problem == "Linear algebra"
285
+ or problem == "Linguistic problem with relations"
286
+ ):
287
+ UserMessage("This feature is not yet implemented.", raise_with=NotImplementedError)
260
288
  else:
261
289
  return "Sorry, something went wrong. Please check if your backend is available and try again or report an issue to the devs. :("
262
290
  return None
@@ -271,8 +299,8 @@ def process_query(args) -> None:
271
299
 
272
300
  def run() -> None:
273
301
  # All the logic of argparse goes in this function
274
- parser = argparse.ArgumentParser(description='Welcome to the Symbolic<AI/> Shell support tool!')
275
- parser.add_argument('query', type=str, help='The prompt for the shell query.')
302
+ parser = argparse.ArgumentParser(description="Welcome to the Symbolic<AI/> Shell support tool!")
303
+ parser.add_argument("query", type=str, help="The prompt for the shell query.")
276
304
 
277
305
  args = parser.parse_args()
278
306
  process_query(args)
@@ -1,4 +1,3 @@
1
-
2
1
  from ..components import Clean, Outline, Sequence, Stream, Translate
3
2
  from ..symbol import Expression, Symbol
4
3
 
@@ -9,12 +8,14 @@ class Summarizer(Expression):
9
8
  filters = []
10
9
  super().__init__(**kwargs)
11
10
  filters = filters if isinstance(filters, (list, tuple)) else [filters]
12
- self.data_stream = Stream(Sequence(
13
- Clean(),
14
- Translate(),
15
- Outline(),
16
- *filters,
17
- ))
11
+ self.data_stream = Stream(
12
+ Sequence(
13
+ Clean(),
14
+ Translate(),
15
+ Outline(),
16
+ *filters,
17
+ )
18
+ )
18
19
 
19
20
  def forward(self, sym: Symbol, **kwargs) -> Symbol:
20
21
  vals = list(self.data_stream(sym, **kwargs))
@@ -9,15 +9,12 @@ from ..symbol import Expression, Symbol
9
9
  def create_template():
10
10
  package_path = pathlib.Path(__file__).parent.absolute()
11
11
 
12
-
13
- with (package_path / 'symbol.py').open() as f:
12
+ with (package_path / "symbol.py").open() as f:
14
13
  SYMBOL_API = f.read()
15
14
 
16
-
17
- with (package_path / 'components.py').open() as f:
15
+ with (package_path / "components.py").open() as f:
18
16
  COMPONENTS_API = f.read()
19
17
 
20
-
21
18
  return f"""[Description]
22
19
  You are a programming language re-writing system from Taypan (high-level general-purpose programming language based on neuro-symbolic virtual machine) to Python interpreter, analogous to the relation between Scala and Java is the relation of Taypan to Python.
23
20
 
@@ -113,7 +110,7 @@ def create_template():
113
110
 
114
111
  class TaypanPreProcessor(PreProcessor):
115
112
  def __call__(self, argument):
116
- return f'```taypan\n{argument.args[0]!s}\n =>'
113
+ return f"```taypan\n{argument.args[0]!s}\n =>"
117
114
 
118
115
 
119
116
  class TaypanInterpreter(Expression):
@@ -127,9 +124,13 @@ class TaypanInterpreter(Expression):
127
124
  self.description = create_template()
128
125
 
129
126
  def forward(self, sym: Symbol, **kwargs) -> Symbol:
130
- @zero_shot(prompt="Translate the Taypan code to Python code:\n",
131
- pre_processors=[TaypanPreProcessor()],
132
- post_processors=[CodeExtractPostProcessor()], **kwargs)
127
+ @zero_shot(
128
+ prompt="Translate the Taypan code to Python code:\n",
129
+ pre_processors=[TaypanPreProcessor()],
130
+ post_processors=[CodeExtractPostProcessor()],
131
+ **kwargs,
132
+ )
133
133
  def _func(_, text) -> str:
134
134
  pass
135
+
135
136
  return _func(self, sym)
@@ -21,8 +21,8 @@ from .metrics import (
21
21
  ranking_algorithm_sort,
22
22
  )
23
23
 
24
- logging.getLogger('sentence_transformers').setLevel(logging.WARNING)
25
- logging.getLogger('datasets').setLevel(logging.WARNING)
24
+ logging.getLogger("sentence_transformers").setLevel(logging.WARNING)
25
+ logging.getLogger("datasets").setLevel(logging.WARNING)
26
26
 
27
27
 
28
28
  class VectorDB(Expression):
@@ -35,6 +35,7 @@ class VectorDB(Expression):
35
35
  _default_top_k: ClassVar[int] = 5
36
36
  _default_storage_path: ClassVar[Path] = HOME_PATH / "localdb"
37
37
  _default_index_name: ClassVar[str] = "dataindex"
38
+
38
39
  def __init__(
39
40
  self,
40
41
  documents=_default_documents,
@@ -46,7 +47,7 @@ class VectorDB(Expression):
46
47
  index_dims=_default_index_dims,
47
48
  top_k=_default_top_k,
48
49
  index_name=_default_index_name,
49
- **kwargs
50
+ **kwargs,
50
51
  ):
51
52
  super().__init__(**kwargs)
52
53
  self.config = deepcopy(SYMAI_CONFIG)
@@ -77,7 +78,10 @@ class VectorDB(Expression):
77
78
  elif "adams" in similarity_metric:
78
79
  self.similarity_metric = adams_similarity
79
80
  else:
80
- UserMessage("Similarity metric not supported. Please use either 'dot', 'cosine', 'euclidean', 'adams', or 'derrida'.", raise_with=ValueError)
81
+ UserMessage(
82
+ "Similarity metric not supported. Please use either 'dot', 'cosine', 'euclidean', 'adams', or 'derrida'.",
83
+ raise_with=ValueError,
84
+ )
81
85
 
82
86
  if load_on_init:
83
87
  if isinstance(load_on_init, (str, Path)):
@@ -87,8 +91,11 @@ class VectorDB(Expression):
87
91
  self.load()
88
92
 
89
93
  def _init_embedding_model(self):
90
- if self.config['EMBEDDING_ENGINE_API_KEY'] is None or self.config['EMBEDDING_ENGINE_API_KEY'] == '':
91
- self.model = Interface('ExtensityAI/embeddings') # default to local model
94
+ if (
95
+ self.config["EMBEDDING_ENGINE_API_KEY"] is None
96
+ or self.config["EMBEDDING_ENGINE_API_KEY"] == ""
97
+ ):
98
+ self.model = Interface("ExtensityAI/embeddings") # default to local model
92
99
  else:
93
100
  self.model = lambda x: Symbol(x).embedding
94
101
 
@@ -158,7 +165,10 @@ class VectorDB(Expression):
158
165
  if len(documents) == 0:
159
166
  return []
160
167
  texts = self._to_texts(documents, key)
161
- batches = [texts[index : index + self.batch_size] for index in range(0, len(texts), self.batch_size)]
168
+ batches = [
169
+ texts[index : index + self.batch_size]
170
+ for index in range(0, len(texts), self.batch_size)
171
+ ]
162
172
  embeddings = []
163
173
  for batch in batches:
164
174
  embeddings.extend(self._embed_batch(batch))
@@ -186,8 +196,7 @@ class VectorDB(Expression):
186
196
  )
187
197
  ]
188
198
  return [
189
- {"document": document, "index": index}
190
- for index, document in enumerate(self.documents)
199
+ {"document": document, "index": index} for index, document in enumerate(self.documents)
191
200
  ]
192
201
 
193
202
  def add(self, documents, vectors=None):
@@ -222,7 +231,7 @@ class VectorDB(Expression):
222
231
  A vector to add to the database.
223
232
 
224
233
  """
225
- vector = (vector if vector is not None else self.embedding_function([document])[0])
234
+ vector = vector if vector is not None else self.embedding_function([document])[0]
226
235
  if self.vectors is None:
227
236
  self.vectors = np.empty((0, len(vector)), dtype=np.float32)
228
237
  elif len(vector) != self.vectors.shape[1]:
@@ -269,7 +278,7 @@ class VectorDB(Expression):
269
278
  Clears the database.
270
279
 
271
280
  """
272
- self.vectors = None
281
+ self.vectors = None
273
282
  self.documents = []
274
283
 
275
284
  def save(self, storage_file: str | None = None):
@@ -296,7 +305,7 @@ class VectorDB(Expression):
296
305
  with storage_file.open("wb") as f:
297
306
  pickle.dump(data, f)
298
307
 
299
- def load(self, storage_file : str | None = None):
308
+ def load(self, storage_file: str | None = None):
300
309
  """
301
310
  Loads the database from a file.
302
311
 
@@ -326,7 +335,7 @@ class VectorDB(Expression):
326
335
  self.vectors = data["vectors"].astype(np.float32) if data["vectors"] is not None else None
327
336
  self.documents = data["documents"]
328
337
 
329
- def purge(self, index_name : str):
338
+ def purge(self, index_name: str):
330
339
  """
331
340
  Purges the database file from your machine, but does not delete the database from memory.
332
341
  Use the `clear` method to clear the database from memory.
@@ -371,7 +380,9 @@ class VectorDB(Expression):
371
380
  A list of results.
372
381
 
373
382
  """
374
- assert self.vectors is not None, "Error: Cannot query the database without prior insertion / initialization."
383
+ assert self.vectors is not None, (
384
+ "Error: Cannot query the database without prior insertion / initialization."
385
+ )
375
386
  top_k = top_k or self.index_top_k
376
387
  query_vector = self.embedding_function([query])[0] if vector is None else vector
377
388
  if isinstance(query_vector, list):
@@ -380,5 +391,7 @@ class VectorDB(Expression):
380
391
  self.vectors, query_vector, top_k=top_k, metric=self.similarity_metric
381
392
  )
382
393
  if return_similarities:
383
- return list(zip([self.documents[index] for index in ranked_results], similarities, strict=False))
394
+ return list(
395
+ zip([self.documents[index] for index in ranked_results], similarities, strict=False)
396
+ )
384
397
  return [self.documents[index] for index in ranked_results]
@@ -21,16 +21,16 @@ class ParagraphFormatter(Expression):
21
21
 
22
22
  def split_files(self, input_text=""):
23
23
  input_ = input_text.strip()
24
- if input_.startswith('# ----[FILE_START]') and '# ----[FILE_END]' in input_:
24
+ if input_.startswith("# ----[FILE_START]") and "# ----[FILE_END]" in input_:
25
25
  self._has_file_start = True
26
26
  # split text file-wise and create a map of file names and their contents
27
27
  files = {}
28
- split_text = input_.split('# ----[FILE_START]')
28
+ split_text = input_.split("# ----[FILE_START]")
29
29
  for _i, file in enumerate(split_text):
30
30
  if not file.strip():
31
31
  continue
32
- _, content_file = file.split('[FILE_CONTENT]:')
33
- content, file_name = content_file.split('# ----[FILE_END]')
32
+ _, content_file = file.split("[FILE_CONTENT]:")
33
+ content, file_name = content_file.split("# ----[FILE_END]")
34
34
  files[file_name.strip()] = content.strip()
35
35
  else:
36
36
  files = {"": input_}
@@ -40,8 +40,10 @@ class ParagraphFormatter(Expression):
40
40
  if file_name and self._has_file_start:
41
41
  header = f"# ----[FILE_START]<PART{part}/{total_parts}>{file_name}[FILE_CONTENT]:\n"
42
42
  footer = f"\n# ----[FILE_END]{file_name}\n"
43
- if '[FILE_CONTENT]:' in paragraph: # TODO: remove this if statement after fixing the bug
44
- paragraph = paragraph.split('[FILE_CONTENT]:')[-1].strip()
43
+ if (
44
+ "[FILE_CONTENT]:" in paragraph
45
+ ): # TODO: remove this if statement after fixing the bug
46
+ paragraph = paragraph.split("[FILE_CONTENT]:")[-1].strip()
45
47
  paragraph = header + paragraph + footer
46
48
  return paragraph
47
49
 
@@ -67,7 +69,12 @@ class ParagraphFormatter(Expression):
67
69
  input_ = file_content.strip()
68
70
  split_text = self.NEWLINES_RE.split(input_)
69
71
 
70
- par = [self._add_header_footer(p, file_name, part=i+1, total_parts=len(split_text)) + "\n" for i, p in enumerate(split_text) if p.strip()]
72
+ par = [
73
+ self._add_header_footer(p, file_name, part=i + 1, total_parts=len(split_text))
74
+ + "\n"
75
+ for i, p in enumerate(split_text)
76
+ if p.strip()
77
+ ]
71
78
  # p + "\n" ensures that all lines in the paragraph end with a newline
72
79
  # p.strip() == True if paragraph has other characters than whitespace
73
80
 
@@ -85,14 +92,20 @@ class ParagraphFormatter(Expression):
85
92
  # n splits
86
93
  total_parts = (len(words) // max_length + 1) * self._get_total_parts(text)
87
94
  for p, i in enumerate(range(0, len(words), max_length)):
88
- paragraph = ' '.join(words[i:i + max_length])
89
- paragraphs.append(self._add_header_footer(paragraph, file_name, part=p+1, total_parts=total_parts) + "\n")
95
+ paragraph = " ".join(words[i : i + max_length])
96
+ paragraphs.append(
97
+ self._add_header_footer(
98
+ paragraph, file_name, part=p + 1, total_parts=total_parts
99
+ )
100
+ + "\n"
101
+ )
90
102
  else:
91
103
  paragraphs.append(text)
92
104
  return paragraphs
93
105
 
94
- @core_ext.bind(engine='embedding', property='max_tokens')
95
- def _max_tokens(self): pass
106
+ @core_ext.bind(engine="embedding", property="max_tokens")
107
+ def _max_tokens(self):
108
+ pass
96
109
 
97
110
  def split_max_tokens_exceeded(self, input_text: List[str], token_ratio=0.5):
98
111
  paragraphs = []
@@ -107,8 +120,13 @@ class ParagraphFormatter(Expression):
107
120
  text_len_ = len(str(text)) // splits_
108
121
  total_parts = (text_len_ + 1) * self._get_total_parts(text)
109
122
  for i in range(splits_):
110
- paragraph = text[i * text_len_:(i + 1) * text_len_]
111
- paragraphs.append(self._add_header_footer(paragraph, file_name, part=i+1, total_parts=total_parts) + "\n")
123
+ paragraph = text[i * text_len_ : (i + 1) * text_len_]
124
+ paragraphs.append(
125
+ self._add_header_footer(
126
+ paragraph, file_name, part=i + 1, total_parts=total_parts
127
+ )
128
+ + "\n"
129
+ )
112
130
  else:
113
131
  paragraphs.append(text)
114
132
  return paragraphs
@@ -126,7 +144,9 @@ class ParagraphFormatter(Expression):
126
144
  class SentenceFormatter(Expression):
127
145
  def __init__(self, value=None, **kwargs):
128
146
  super().__init__(value, **kwargs)
129
- self.SENTENCES_RE = re.compile(r"[.!?]\n*|[\n]{1,}") # Sentence ending characters followed by newlines
147
+ self.SENTENCES_RE = re.compile(
148
+ r"[.!?]\n*|[\n]{1,}"
149
+ ) # Sentence ending characters followed by newlines
130
150
 
131
151
  def split_sentences(self, input_text=""):
132
152
  input_ = input_text.strip()
@@ -161,13 +181,7 @@ class RegexFormatter(Expression):
161
181
 
162
182
 
163
183
  class TextContainerFormatter(Expression):
164
- def __init__(
165
- self,
166
- value: Any = None,
167
- key: str ="text",
168
- text_split: int = 4,
169
- **kwargs
170
- ):
184
+ def __init__(self, value: Any = None, key: str = "text", text_split: int = 4, **kwargs):
171
185
  super().__init__(value, **kwargs)
172
186
  self.key = key
173
187
  self.text_split = text_split
@@ -179,7 +193,7 @@ class TextContainerFormatter(Expression):
179
193
  chunks = [text for container in tqdm(containers) for text in self._chunk(container)]
180
194
  return self._to_symbol(chunks)
181
195
 
182
- def _chunk(self, container: 'TextContainer') -> List[str]:
196
+ def _chunk(self, container: "TextContainer") -> List[str]:
183
197
  text = container.text
184
198
  step = len(text) // self.text_split
185
199
  splits = []
@@ -189,16 +203,10 @@ class TextContainerFormatter(Expression):
189
203
  # Unify the last chunk with the previous one if necessary
190
204
  splits.append(self._as_str(text[i:], container))
191
205
  break
192
- splits.append(self._as_str(text[i:i+step], container))
206
+ splits.append(self._as_str(text[i : i + step], container))
193
207
  i += step
194
208
  c += 1
195
209
  return splits
196
210
 
197
- def _as_str(self, text: str, container: 'TextContainer') -> str:
198
- return (
199
- '---\n'
200
- f"id: {container.id}\n"
201
- f"page: {container.page}\n"
202
- '---\n'
203
- f"{text}"
204
- )
211
+ def _as_str(self, text: str, container: "TextContainer") -> str:
212
+ return f"---\nid: {container.id}\npage: {container.page}\n---\n{text}"