PyPI - symbolicai - Versions diffs - 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

symbolicai 1.0.0py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (127) hide show

symai/__init__.py +198 -134
symai/backend/base.py +51 -51
symai/backend/engines/drawing/engine_bfl.py +33 -33
symai/backend/engines/drawing/engine_gpt_image.py +4 -10
symai/backend/engines/embedding/engine_llama_cpp.py +50 -35
symai/backend/engines/embedding/engine_openai.py +22 -16
symai/backend/engines/execute/engine_python.py +16 -16
symai/backend/engines/files/engine_io.py +51 -49
symai/backend/engines/imagecaptioning/engine_blip2.py +27 -23
symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +53 -46
symai/backend/engines/index/engine_pinecone.py +116 -88
symai/backend/engines/index/engine_qdrant.py +1011 -0
symai/backend/engines/index/engine_vectordb.py +78 -52
symai/backend/engines/lean/engine_lean4.py +65 -25
symai/backend/engines/neurosymbolic/__init__.py +28 -28
symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +137 -135
symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +145 -152
symai/backend/engines/neurosymbolic/engine_cerebras.py +328 -0
symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +75 -49
symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +199 -155
symai/backend/engines/neurosymbolic/engine_groq.py +106 -72
symai/backend/engines/neurosymbolic/engine_huggingface.py +100 -67
symai/backend/engines/neurosymbolic/engine_llama_cpp.py +121 -93
symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +213 -132
symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +180 -137
symai/backend/engines/ocr/engine_apilayer.py +18 -20
symai/backend/engines/output/engine_stdout.py +9 -9
symai/backend/engines/{webscraping → scrape}/engine_requests.py +25 -11
symai/backend/engines/search/engine_openai.py +95 -83
symai/backend/engines/search/engine_parallel.py +665 -0
symai/backend/engines/search/engine_perplexity.py +40 -41
symai/backend/engines/search/engine_serpapi.py +33 -28
symai/backend/engines/speech_to_text/engine_local_whisper.py +37 -27
symai/backend/engines/symbolic/engine_wolframalpha.py +14 -8
symai/backend/engines/text_to_speech/engine_openai.py +15 -19
symai/backend/engines/text_vision/engine_clip.py +34 -28
symai/backend/engines/userinput/engine_console.py +3 -4
symai/backend/mixin/anthropic.py +48 -40
symai/backend/mixin/deepseek.py +4 -5
symai/backend/mixin/google.py +5 -4
symai/backend/mixin/groq.py +2 -4
symai/backend/mixin/openai.py +132 -110
symai/backend/settings.py +14 -14
symai/chat.py +164 -94
symai/collect/dynamic.py +13 -11
symai/collect/pipeline.py +39 -31
symai/collect/stats.py +109 -69
symai/components.py +556 -238
symai/constraints.py +14 -5
symai/core.py +1495 -1210
symai/core_ext.py +55 -50
symai/endpoints/api.py +113 -58
symai/extended/api_builder.py +22 -17
symai/extended/arxiv_pdf_parser.py +13 -5
symai/extended/bibtex_parser.py +8 -4
symai/extended/conversation.py +88 -69
symai/extended/document.py +40 -27
symai/extended/file_merger.py +45 -7
symai/extended/graph.py +38 -24
symai/extended/html_style_template.py +17 -11
symai/extended/interfaces/blip_2.py +1 -1
symai/extended/interfaces/clip.py +4 -2
symai/extended/interfaces/console.py +5 -3
symai/extended/interfaces/dall_e.py +3 -1
symai/extended/interfaces/file.py +2 -0
symai/extended/interfaces/flux.py +3 -1
symai/extended/interfaces/gpt_image.py +15 -6
symai/extended/interfaces/input.py +2 -1
symai/extended/interfaces/llava.py +1 -1
symai/extended/interfaces/{naive_webscraping.py → naive_scrape.py} +3 -2
symai/extended/interfaces/naive_vectordb.py +2 -2
symai/extended/interfaces/ocr.py +4 -2
symai/extended/interfaces/openai_search.py +2 -0
symai/extended/interfaces/parallel.py +30 -0
symai/extended/interfaces/perplexity.py +2 -0
symai/extended/interfaces/pinecone.py +6 -4
symai/extended/interfaces/python.py +2 -0
symai/extended/interfaces/serpapi.py +2 -0
symai/extended/interfaces/terminal.py +0 -1
symai/extended/interfaces/tts.py +2 -1
symai/extended/interfaces/whisper.py +2 -1
symai/extended/interfaces/wolframalpha.py +1 -0
symai/extended/metrics/__init__.py +1 -1
symai/extended/metrics/similarity.py +5 -2
symai/extended/os_command.py +31 -22
symai/extended/packages/symdev.py +39 -34
symai/extended/packages/sympkg.py +30 -27
symai/extended/packages/symrun.py +46 -35
symai/extended/repo_cloner.py +10 -9
symai/extended/seo_query_optimizer.py +15 -12
symai/extended/solver.py +104 -76
symai/extended/summarizer.py +8 -7
symai/extended/taypan_interpreter.py +10 -9
symai/extended/vectordb.py +28 -15
symai/formatter/formatter.py +39 -31
symai/formatter/regex.py +46 -44
symai/functional.py +184 -86
symai/imports.py +85 -51
symai/interfaces.py +1 -1
symai/memory.py +33 -24
symai/menu/screen.py +28 -19
symai/misc/console.py +27 -27
symai/misc/loader.py +4 -3
symai/models/base.py +147 -76
symai/models/errors.py +1 -1
symai/ops/__init__.py +1 -1
symai/ops/measures.py +17 -14
symai/ops/primitives.py +933 -635
symai/post_processors.py +28 -24
symai/pre_processors.py +58 -52
symai/processor.py +15 -9
symai/prompts.py +714 -649
symai/server/huggingface_server.py +115 -32
symai/server/llama_cpp_server.py +14 -6
symai/server/qdrant_server.py +206 -0
symai/shell.py +98 -39
symai/shellsv.py +307 -223
symai/strategy.py +135 -81
symai/symbol.py +276 -225
symai/utils.py +62 -46
{symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/METADATA +19 -9
symbolicai-1.1.0.dist-info/RECORD +168 -0
symbolicai-1.0.0.dist-info/RECORD +0 -163
{symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/WHEEL +0 -0
{symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/entry_points.txt +0 -0
{symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/licenses/LICENSE +0 -0
{symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/top_level.txt +0 -0

symai/extended/solver.py CHANGED Viewed

@@ -57,14 +57,15 @@ $> Max is 2 years older than his brother. In 5 years, Max will be 3 times as old
 --------------
 """
 class ProblemClassifierPreProcessor(PreProcessor):
     def __call__(self, argument):
-        return f'$> {argument.prop.instance!s}\n//'
+        return f"$> {argument.prop.instance!s}\n//"
 class OptionsPreProcessor(PreProcessor):
     def __call__(self, argument):
-        return f'$> :{argument.prop.instance!s}: == :{argument.args[0]!s}: =>'
+        return f"$> :{argument.prop.instance!s}: == :{argument.args[0]!s}: =>"
 class ProblemClassifier(Expression):
@@ -75,32 +76,41 @@ class ProblemClassifier(Expression):
         return PROBLEM_CATEGORY_CONTEXT
     def __eq__(self, other, **kwargs) -> bool:
-        @core.few_shot(prompt="Verify equality of the following categories. Ignore typos, upper / lower case or singular / plural differences:\n",
-                     examples=Prompt([
-                         '$> :Arithmetic formula: == :Arithmetics formula: =>True EOF',
-                         '$> :arithmetic formula: == :Arithmetic formula: =>True EOF',
-                         '$> :arithmetic formula: == :arithmeticformula: =>True EOF',
-                         '$> :arithmetic formula: == :Implication and logical expressions: =>False EOF',
-                         '$> :Linear algebra: == :Implication and logical expressions: =>False EOF',
-                         '$> :Linear algebra: == :Unknown category: =>False EOF',
-                         '$> :Linear algebra: == :Linear algebra: =>True EOF',
-                         '$> :Probability and statistics: == :Probabilities and statistics: =>True EOF',
-                         '$> :PROBABILITY AND STATISTICS: == :Probability and statistics: =>True EOF',
-                         '$> :PROBABILITY AND STATISTICS: == :UNKNOWN CATEGORY: =>False EOF',
-                     ]),
-                     pre_processors=[OptionsPreProcessor()],
-                     post_processors=[StripPostProcessor()],
-                     stop=['EOF'], **kwargs)
+        @core.few_shot(
+            prompt="Verify equality of the following categories. Ignore typos, upper / lower case or singular / plural differences:\n",
+            examples=Prompt(
+                [
+                    "$> :Arithmetic formula: == :Arithmetics formula: =>True EOF",
+                    "$> :arithmetic formula: == :Arithmetic formula: =>True EOF",
+                    "$> :arithmetic formula: == :arithmeticformula: =>True EOF",
+                    "$> :arithmetic formula: == :Implication and logical expressions: =>False EOF",
+                    "$> :Linear algebra: == :Implication and logical expressions: =>False EOF",
+                    "$> :Linear algebra: == :Unknown category: =>False EOF",
+                    "$> :Linear algebra: == :Linear algebra: =>True EOF",
+                    "$> :Probability and statistics: == :Probabilities and statistics: =>True EOF",
+                    "$> :PROBABILITY AND STATISTICS: == :Probability and statistics: =>True EOF",
+                    "$> :PROBABILITY AND STATISTICS: == :UNKNOWN CATEGORY: =>False EOF",
+                ]
+            ),
+            pre_processors=[OptionsPreProcessor()],
+            post_processors=[StripPostProcessor()],
+            stop=["EOF"],
+            **kwargs,
+        )
         def _func(_, other) -> bool:
             pass
         return _func(self, other)
     def forward(self, **kwargs) -> str:
-        @core.few_shot(prompt="Classify the user query to the mathematical classes:\n",
-                     examples=[],
-                     pre_processors=[ProblemClassifierPreProcessor()],
-                     post_processors=[StripPostProcessor()],
-                     stop=['EOF'], **kwargs)
+        @core.few_shot(
+            prompt="Classify the user query to the mathematical classes:\n",
+            examples=[],
+            pre_processors=[ProblemClassifierPreProcessor()],
+            post_processors=[StripPostProcessor()],
+            stop=["EOF"],
+            **kwargs,
+        )
         def _func(_) -> str:
             pass
@@ -109,34 +119,40 @@ class ProblemClassifier(Expression):
 class FormulaCheckerPreProcessor(PreProcessor):
     def __call__(self, argument):
-        return f'$> {argument.prop.instance!s} =>'
+        return f"$> {argument.prop.instance!s} =>"
 class FormulaChecker(Expression):
     def forward(self, **kwargs) -> bool:
-        @core.few_shot(prompt="Is the following statement in an explicit formula form without natural language text?:\n",
-                     examples=Prompt([
-                         '$> 2 + 2 * 2 =>True EOF',
-                         '$> x + 2 = 3 =>True EOF',
-                         '$> Set of all natural numbers =>False EOF',
-                         '$> Probability of drawing a red ball =>False EOF',
-                         '$> (a + b) * (a - b) =>True EOF',
-                         '$> Add the square root of nine to the square root of x =>False EOF',
-                         '$> Five plus two equals seven =>False EOF',
-                         '$> 5 + 2 = 7 =>True EOF',
-                         '$> x is seven =>False EOF',
-                         '$> x = 7 =>True EOF',
-                         '$> Anna has two apples. She gives one to her brother. How many apples does Anna have now? =>False EOF',
-                         '$> 0.447662 =>True EOF',
-                         '$> Subtract the x from y squared =>False EOF',
-                         '$> The sum of the first n natural numbers =>False EOF',
-                         '$> Sum[x=5, {i=0, n=10}] =>True EOF',
-                     ]),
-                     pre_processors=[FormulaCheckerPreProcessor()],
-                     post_processors=[StripPostProcessor()],
-                     stop=['EOF'], **kwargs)
+        @core.few_shot(
+            prompt="Is the following statement in an explicit formula form without natural language text?:\n",
+            examples=Prompt(
+                [
+                    "$> 2 + 2 * 2 =>True EOF",
+                    "$> x + 2 = 3 =>True EOF",
+                    "$> Set of all natural numbers =>False EOF",
+                    "$> Probability of drawing a red ball =>False EOF",
+                    "$> (a + b) * (a - b) =>True EOF",
+                    "$> Add the square root of nine to the square root of x =>False EOF",
+                    "$> Five plus two equals seven =>False EOF",
+                    "$> 5 + 2 = 7 =>True EOF",
+                    "$> x is seven =>False EOF",
+                    "$> x = 7 =>True EOF",
+                    "$> Anna has two apples. She gives one to her brother. How many apples does Anna have now? =>False EOF",
+                    "$> 0.447662 =>True EOF",
+                    "$> Subtract the x from y squared =>False EOF",
+                    "$> The sum of the first n natural numbers =>False EOF",
+                    "$> Sum[x=5, {i=0, n=10}] =>True EOF",
+                ]
+            ),
+            pre_processors=[FormulaCheckerPreProcessor()],
+            post_processors=[StripPostProcessor()],
+            stop=["EOF"],
+            **kwargs,
+        )
         def _func(_) -> bool:
             pass
         return _func(self)
@@ -149,27 +165,33 @@ class FormulaChecker(Expression):
 class FormulaWriterPreProcessor(PreProcessor):
     def __call__(self, argument):
-        return f'$> {argument.prop.instance!s} =>'
+        return f"$> {argument.prop.instance!s} =>"
 class FormulaWriter(Expression):
     def forward(self, **kwargs) -> str:
-        @core.few_shot(prompt="Rewrite the following natural language statement in a mathematical formula or higher-order logic statement to be solved by Mathematica:\n",
-                     examples=Prompt([
-                         '$> Add 5 plus 3 =>5 + 3 EOF',
-                         '$> Seventy plus twenty =>70 + 20 EOF',
-                         '$> Divide 5 by three =>5 / 3 EOF',
-                         '$> The square root of pi plus x. =>Sqrt[Pi + x] EOF',
-                         '$> Eight point five six seven one four two seven =>8.5671427 EOF',
-                         '$> Give a solution for a quadratic equation x^2 + 2x + 1 =>Solve[x^2 + 2x + 1 ==0, x] EOF',
-                         '$> Sum x n times from i equals 0 to n equals 10. x is equals to 5. =>Sum[x=5, {i=0, n=10}] EOF',
-                         '$> Multiply the first statement in brackets a plus b times the second term in brackets c minus d =>(a + b) * (c - d) EOF'
-                     ]),
-                     pre_processors=[FormulaWriterPreProcessor()],
-                     post_processors=[StripPostProcessor()],
-                     stop=['EOF'], **kwargs)
+        @core.few_shot(
+            prompt="Rewrite the following natural language statement in a mathematical formula or higher-order logic statement to be solved by Mathematica:\n",
+            examples=Prompt(
+                [
+                    "$> Add 5 plus 3 =>5 + 3 EOF",
+                    "$> Seventy plus twenty =>70 + 20 EOF",
+                    "$> Divide 5 by three =>5 / 3 EOF",
+                    "$> The square root of pi plus x. =>Sqrt[Pi + x] EOF",
+                    "$> Eight point five six seven one four two seven =>8.5671427 EOF",
+                    "$> Give a solution for a quadratic equation x^2 + 2x + 1 =>Solve[x^2 + 2x + 1 ==0, x] EOF",
+                    "$> Sum x n times from i equals 0 to n equals 10. x is equals to 5. =>Sum[x=5, {i=0, n=10}] EOF",
+                    "$> Multiply the first statement in brackets a plus b times the second term in brackets c minus d =>(a + b) * (c - d) EOF",
+                ]
+            ),
+            pre_processors=[FormulaWriterPreProcessor()],
+            post_processors=[StripPostProcessor()],
+            stop=["EOF"],
+            **kwargs,
+        )
         def _func(_) -> str:
             pass
         return _func(self)
@@ -199,17 +221,19 @@ _value_obj_ = problem_statement
 class SATSolver(Expression):
     def forward(self, code):
-        assert z3 is not None, "The z3 library is not installed. Please install it using `pip install 'symbolicai[solver]'` and try again."
+        assert z3 is not None, (
+            "The z3 library is not installed. Please install it using `pip install 'symbolicai[solver]'` and try again."
+        )
         # Create the execution template
-        runner    = Execute(enclosure=True)
+        runner = Execute(enclosure=True)
         # Execute the code
         statement = runner(code)
         # Create a new solver instance
-        S         = z3.Solver()
+        S = z3.Solver()
         # Create a new query
-        query     = statement['locals']['_output_'](S)
+        query = statement["locals"]["_output_"](S)
         # Check if the query can be solved
-        r         = S.check()
+        r = S.check()
         # Print the solution
         if r == z3.sat:
             # Get the model
@@ -232,8 +256,8 @@ class Solver(Expression):
         super().__init__(**kwargs)
         self.sym_return_type = Solver
         self.solver = SATSolver()
-        self.conv   = Conversation(init=LOGIC_TEMPLATE)
-        self.pp     = CodeExtractPostProcessor()
+        self.conv = Conversation(init=LOGIC_TEMPLATE)
+        self.pp = CodeExtractPostProcessor()
     def rewrite_formula(self, sym, **kwargs):
         formula = sym
@@ -247,16 +271,20 @@ class Solver(Expression):
         classifier = ProblemClassifier(sym)
         problem = classifier(**kwargs)
-        if problem == 'Arithmetics formula' or problem == 'Equations':
+        if problem == "Arithmetics formula" or problem == "Equations":
             formula = self.rewrite_formula(sym, **kwargs)
             UserMessage(str(formula))
-        elif problem == 'Implication and logical expressions':
-            res     = self.conv(sym, **kwargs)
-            code    = self.pp(str(res), None, tag="python")
-            formula = self.solver(code, lambda: 'German')
+        elif problem == "Implication and logical expressions":
+            res = self.conv(sym, **kwargs)
+            code = self.pp(str(res), None, tag="python")
+            formula = self.solver(code, lambda: "German")
             UserMessage(str(formula))
-        elif problem == 'Probability and statistics' or problem == 'Linear algebra' or problem == 'Linguistic problem with relations':
-            UserMessage('This feature is not yet implemented.', raise_with=NotImplementedError)
+        elif (
+            problem == "Probability and statistics"
+            or problem == "Linear algebra"
+            or problem == "Linguistic problem with relations"
+        ):
+            UserMessage("This feature is not yet implemented.", raise_with=NotImplementedError)
         else:
             return "Sorry, something went wrong. Please check if your backend is available and try again or report an issue to the devs. :("
         return None
@@ -271,8 +299,8 @@ def process_query(args) -> None:
 def run() -> None:
     # All the logic of argparse goes in this function
-    parser = argparse.ArgumentParser(description='Welcome to the Symbolic<AI/> Shell support tool!')
-    parser.add_argument('query', type=str, help='The prompt for the shell query.')
+    parser = argparse.ArgumentParser(description="Welcome to the Symbolic<AI/> Shell support tool!")
+    parser.add_argument("query", type=str, help="The prompt for the shell query.")
     args = parser.parse_args()
     process_query(args)

symai/extended/summarizer.py CHANGED Viewed

@@ -1,4 +1,3 @@
 from ..components import Clean, Outline, Sequence, Stream, Translate
 from ..symbol import Expression, Symbol
@@ -9,12 +8,14 @@ class Summarizer(Expression):
             filters = []
         super().__init__(**kwargs)
         filters = filters if isinstance(filters, (list, tuple)) else [filters]
-        self.data_stream = Stream(Sequence(
-            Clean(),
-            Translate(),
-            Outline(),
-            *filters,
-        ))
+        self.data_stream = Stream(
+            Sequence(
+                Clean(),
+                Translate(),
+                Outline(),
+                *filters,
+            )
+        )
     def forward(self, sym: Symbol, **kwargs) -> Symbol:
         vals = list(self.data_stream(sym, **kwargs))

symai/extended/taypan_interpreter.py CHANGED Viewed

@@ -9,15 +9,12 @@ from ..symbol import Expression, Symbol
 def create_template():
     package_path = pathlib.Path(__file__).parent.absolute()
-    with (package_path / 'symbol.py').open() as f:
+    with (package_path / "symbol.py").open() as f:
         SYMBOL_API = f.read()
-    with (package_path / 'components.py').open() as f:
+    with (package_path / "components.py").open() as f:
         COMPONENTS_API = f.read()
     return f"""[Description]
     You are a programming language re-writing system from Taypan (high-level general-purpose programming language based on neuro-symbolic virtual machine) to Python interpreter, analogous to the relation between Scala and Java is the relation of Taypan to Python.
@@ -113,7 +110,7 @@ def create_template():
 class TaypanPreProcessor(PreProcessor):
     def __call__(self, argument):
-        return f'```taypan\n{argument.args[0]!s}\n =>'
+        return f"```taypan\n{argument.args[0]!s}\n =>"
 class TaypanInterpreter(Expression):
@@ -127,9 +124,13 @@ class TaypanInterpreter(Expression):
         self.description = create_template()
     def forward(self, sym: Symbol, **kwargs) -> Symbol:
-        @zero_shot(prompt="Translate the Taypan code to Python code:\n",
-                   pre_processors=[TaypanPreProcessor()],
-                   post_processors=[CodeExtractPostProcessor()], **kwargs)
+        @zero_shot(
+            prompt="Translate the Taypan code to Python code:\n",
+            pre_processors=[TaypanPreProcessor()],
+            post_processors=[CodeExtractPostProcessor()],
+            **kwargs,
+        )
         def _func(_, text) -> str:
             pass
         return _func(self, sym)

symai/extended/vectordb.py CHANGED Viewed

@@ -21,8 +21,8 @@ from .metrics import (
     ranking_algorithm_sort,
 )
-logging.getLogger('sentence_transformers').setLevel(logging.WARNING)
-logging.getLogger('datasets').setLevel(logging.WARNING)
+logging.getLogger("sentence_transformers").setLevel(logging.WARNING)
+logging.getLogger("datasets").setLevel(logging.WARNING)
 class VectorDB(Expression):
@@ -35,6 +35,7 @@ class VectorDB(Expression):
     _default_top_k: ClassVar[int] = 5
     _default_storage_path: ClassVar[Path] = HOME_PATH / "localdb"
     _default_index_name: ClassVar[str] = "dataindex"
     def __init__(
         self,
         documents=_default_documents,
@@ -46,7 +47,7 @@ class VectorDB(Expression):
         index_dims=_default_index_dims,
         top_k=_default_top_k,
         index_name=_default_index_name,
-        **kwargs
+        **kwargs,
     ):
         super().__init__(**kwargs)
         self.config = deepcopy(SYMAI_CONFIG)
@@ -77,7 +78,10 @@ class VectorDB(Expression):
         elif "adams" in similarity_metric:
             self.similarity_metric = adams_similarity
         else:
-            UserMessage("Similarity metric not supported. Please use either 'dot', 'cosine', 'euclidean', 'adams', or 'derrida'.", raise_with=ValueError)
+            UserMessage(
+                "Similarity metric not supported. Please use either 'dot', 'cosine', 'euclidean', 'adams', or 'derrida'.",
+                raise_with=ValueError,
+            )
         if load_on_init:
             if isinstance(load_on_init, (str, Path)):
@@ -87,8 +91,11 @@ class VectorDB(Expression):
                 self.load()
     def _init_embedding_model(self):
-        if self.config['EMBEDDING_ENGINE_API_KEY'] is None or  self.config['EMBEDDING_ENGINE_API_KEY'] == '':
-            self.model = Interface('ExtensityAI/embeddings') # default to local model
+        if (
+            self.config["EMBEDDING_ENGINE_API_KEY"] is None
+            or self.config["EMBEDDING_ENGINE_API_KEY"] == ""
+        ):
+            self.model = Interface("ExtensityAI/embeddings")  # default to local model
         else:
             self.model = lambda x: Symbol(x).embedding
@@ -158,7 +165,10 @@ class VectorDB(Expression):
         if len(documents) == 0:
             return []
         texts = self._to_texts(documents, key)
-        batches = [texts[index : index + self.batch_size] for index in range(0, len(texts), self.batch_size)]
+        batches = [
+            texts[index : index + self.batch_size]
+            for index in range(0, len(texts), self.batch_size)
+        ]
         embeddings = []
         for batch in batches:
             embeddings.extend(self._embed_batch(batch))
@@ -186,8 +196,7 @@ class VectorDB(Expression):
                 )
             ]
         return [
-            {"document": document, "index": index}
-            for index, document in enumerate(self.documents)
+            {"document": document, "index": index} for index, document in enumerate(self.documents)
         ]
     def add(self, documents, vectors=None):
@@ -222,7 +231,7 @@ class VectorDB(Expression):
             A vector to add to the database.
         """
-        vector = (vector if vector is not None else self.embedding_function([document])[0])
+        vector = vector if vector is not None else self.embedding_function([document])[0]
         if self.vectors is None:
             self.vectors = np.empty((0, len(vector)), dtype=np.float32)
         elif len(vector) != self.vectors.shape[1]:
@@ -269,7 +278,7 @@ class VectorDB(Expression):
         Clears the database.
         """
-        self.vectors   = None
+        self.vectors = None
         self.documents = []
     def save(self, storage_file: str | None = None):
@@ -296,7 +305,7 @@ class VectorDB(Expression):
             with storage_file.open("wb") as f:
                 pickle.dump(data, f)
-    def load(self, storage_file : str | None = None):
+    def load(self, storage_file: str | None = None):
         """
         Loads the database from a file.
@@ -326,7 +335,7 @@ class VectorDB(Expression):
         self.vectors = data["vectors"].astype(np.float32) if data["vectors"] is not None else None
         self.documents = data["documents"]
-    def purge(self, index_name : str):
+    def purge(self, index_name: str):
         """
         Purges the database file from your machine, but does not delete the database from memory.
         Use the `clear` method to clear the database from memory.
@@ -371,7 +380,9 @@ class VectorDB(Expression):
             A list of results.
         """
-        assert self.vectors is not None, "Error: Cannot query the database without prior insertion / initialization."
+        assert self.vectors is not None, (
+            "Error: Cannot query the database without prior insertion / initialization."
+        )
         top_k = top_k or self.index_top_k
         query_vector = self.embedding_function([query])[0] if vector is None else vector
         if isinstance(query_vector, list):
@@ -380,5 +391,7 @@ class VectorDB(Expression):
             self.vectors, query_vector, top_k=top_k, metric=self.similarity_metric
         )
         if return_similarities:
-            return list(zip([self.documents[index] for index in ranked_results], similarities, strict=False))
+            return list(
+                zip([self.documents[index] for index in ranked_results], similarities, strict=False)
+            )
         return [self.documents[index] for index in ranked_results]

symai/formatter/formatter.py CHANGED Viewed

@@ -21,16 +21,16 @@ class ParagraphFormatter(Expression):
     def split_files(self, input_text=""):
         input_ = input_text.strip()
-        if input_.startswith('# ----[FILE_START]') and '# ----[FILE_END]' in input_:
+        if input_.startswith("# ----[FILE_START]") and "# ----[FILE_END]" in input_:
             self._has_file_start = True
             # split text file-wise and create a map of file names and their contents
             files = {}
-            split_text = input_.split('# ----[FILE_START]')
+            split_text = input_.split("# ----[FILE_START]")
             for _i, file in enumerate(split_text):
                 if not file.strip():
                     continue
-                _, content_file = file.split('[FILE_CONTENT]:')
-                content, file_name = content_file.split('# ----[FILE_END]')
+                _, content_file = file.split("[FILE_CONTENT]:")
+                content, file_name = content_file.split("# ----[FILE_END]")
                 files[file_name.strip()] = content.strip()
         else:
             files = {"": input_}
@@ -40,8 +40,10 @@ class ParagraphFormatter(Expression):
         if file_name and self._has_file_start:
             header = f"# ----[FILE_START]<PART{part}/{total_parts}>{file_name}[FILE_CONTENT]:\n"
             footer = f"\n# ----[FILE_END]{file_name}\n"
-            if '[FILE_CONTENT]:' in paragraph: # TODO: remove this if statement after fixing the bug
-                paragraph = paragraph.split('[FILE_CONTENT]:')[-1].strip()
+            if (
+                "[FILE_CONTENT]:" in paragraph
+            ):  # TODO: remove this if statement after fixing the bug
+                paragraph = paragraph.split("[FILE_CONTENT]:")[-1].strip()
             paragraph = header + paragraph + footer
         return paragraph
@@ -67,7 +69,12 @@ class ParagraphFormatter(Expression):
             input_ = file_content.strip()
             split_text = self.NEWLINES_RE.split(input_)
-            par = [self._add_header_footer(p, file_name, part=i+1, total_parts=len(split_text)) + "\n" for i, p in enumerate(split_text) if p.strip()]
+            par = [
+                self._add_header_footer(p, file_name, part=i + 1, total_parts=len(split_text))
+                + "\n"
+                for i, p in enumerate(split_text)
+                if p.strip()
+            ]
             # p + "\n" ensures that all lines in the paragraph end with a newline
             # p.strip() == True if paragraph has other characters than whitespace
@@ -85,14 +92,20 @@ class ParagraphFormatter(Expression):
                 # n splits
                 total_parts = (len(words) // max_length + 1) * self._get_total_parts(text)
                 for p, i in enumerate(range(0, len(words), max_length)):
-                    paragraph = ' '.join(words[i:i + max_length])
-                    paragraphs.append(self._add_header_footer(paragraph, file_name, part=p+1, total_parts=total_parts) + "\n")
+                    paragraph = " ".join(words[i : i + max_length])
+                    paragraphs.append(
+                        self._add_header_footer(
+                            paragraph, file_name, part=p + 1, total_parts=total_parts
+                        )
+                        + "\n"
+                    )
             else:
                 paragraphs.append(text)
         return paragraphs
-    @core_ext.bind(engine='embedding', property='max_tokens')
-    def _max_tokens(self): pass
+    @core_ext.bind(engine="embedding", property="max_tokens")
+    def _max_tokens(self):
+        pass
     def split_max_tokens_exceeded(self, input_text: List[str], token_ratio=0.5):
         paragraphs = []
@@ -107,8 +120,13 @@ class ParagraphFormatter(Expression):
                 text_len_ = len(str(text)) // splits_
                 total_parts = (text_len_ + 1) * self._get_total_parts(text)
                 for i in range(splits_):
-                    paragraph = text[i * text_len_:(i + 1) * text_len_]
-                    paragraphs.append(self._add_header_footer(paragraph, file_name, part=i+1, total_parts=total_parts) + "\n")
+                    paragraph = text[i * text_len_ : (i + 1) * text_len_]
+                    paragraphs.append(
+                        self._add_header_footer(
+                            paragraph, file_name, part=i + 1, total_parts=total_parts
+                        )
+                        + "\n"
+                    )
             else:
                 paragraphs.append(text)
         return paragraphs
@@ -126,7 +144,9 @@ class ParagraphFormatter(Expression):
 class SentenceFormatter(Expression):
     def __init__(self, value=None, **kwargs):
         super().__init__(value, **kwargs)
-        self.SENTENCES_RE = re.compile(r"[.!?]\n*|[\n]{1,}")  # Sentence ending characters followed by newlines
+        self.SENTENCES_RE = re.compile(
+            r"[.!?]\n*|[\n]{1,}"
+        )  # Sentence ending characters followed by newlines
     def split_sentences(self, input_text=""):
         input_ = input_text.strip()
@@ -161,13 +181,7 @@ class RegexFormatter(Expression):
 class TextContainerFormatter(Expression):
-    def __init__(
-            self,
-            value: Any = None,
-            key: str ="text",
-            text_split: int = 4,
-            **kwargs
-        ):
+    def __init__(self, value: Any = None, key: str = "text", text_split: int = 4, **kwargs):
         super().__init__(value, **kwargs)
         self.key = key
         self.text_split = text_split
@@ -179,7 +193,7 @@ class TextContainerFormatter(Expression):
         chunks = [text for container in tqdm(containers) for text in self._chunk(container)]
         return self._to_symbol(chunks)
-    def _chunk(self, container: 'TextContainer') -> List[str]:
+    def _chunk(self, container: "TextContainer") -> List[str]:
         text = container.text
         step = len(text) // self.text_split
         splits = []
@@ -189,16 +203,10 @@ class TextContainerFormatter(Expression):
                 # Unify the last chunk with the previous one if necessary
                 splits.append(self._as_str(text[i:], container))
                 break
-            splits.append(self._as_str(text[i:i+step], container))
+            splits.append(self._as_str(text[i : i + step], container))
             i += step
             c += 1
         return splits
-    def _as_str(self, text: str, container: 'TextContainer') -> str:
-        return (
-            '---\n'
-            f"id: {container.id}\n"
-            f"page: {container.page}\n"
-            '---\n'
-            f"{text}"
-        )
+    def _as_str(self, text: str, container: "TextContainer") -> str:
+        return f"---\nid: {container.id}\npage: {container.page}\n---\n{text}"

symbolicai 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

symbolicai 1.0.0py3-none-any.whl → 1.1.0py3-none-any.whl