PyPI - symbolicai - Versions diffs - 0.20.2__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

symbolicai 0.20.2py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (123) hide show

symai/__init__.py +96 -64
symai/backend/base.py +93 -80
symai/backend/engines/drawing/engine_bfl.py +12 -11
symai/backend/engines/drawing/engine_gpt_image.py +108 -87
symai/backend/engines/embedding/engine_llama_cpp.py +25 -28
symai/backend/engines/embedding/engine_openai.py +3 -5
symai/backend/engines/execute/engine_python.py +6 -5
symai/backend/engines/files/engine_io.py +74 -67
symai/backend/engines/imagecaptioning/engine_blip2.py +3 -3
symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +54 -38
symai/backend/engines/index/engine_pinecone.py +23 -24
symai/backend/engines/index/engine_vectordb.py +16 -14
symai/backend/engines/lean/engine_lean4.py +38 -34
symai/backend/engines/neurosymbolic/__init__.py +41 -13
symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +262 -182
symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +263 -191
symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +53 -49
symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +212 -211
symai/backend/engines/neurosymbolic/engine_groq.py +87 -63
symai/backend/engines/neurosymbolic/engine_huggingface.py +21 -24
symai/backend/engines/neurosymbolic/engine_llama_cpp.py +117 -48
symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +256 -229
symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +270 -150
symai/backend/engines/ocr/engine_apilayer.py +6 -8
symai/backend/engines/output/engine_stdout.py +1 -4
symai/backend/engines/search/engine_openai.py +7 -7
symai/backend/engines/search/engine_perplexity.py +5 -5
symai/backend/engines/search/engine_serpapi.py +12 -14
symai/backend/engines/speech_to_text/engine_local_whisper.py +20 -27
symai/backend/engines/symbolic/engine_wolframalpha.py +3 -3
symai/backend/engines/text_to_speech/engine_openai.py +5 -7
symai/backend/engines/text_vision/engine_clip.py +7 -11
symai/backend/engines/userinput/engine_console.py +3 -3
symai/backend/engines/webscraping/engine_requests.py +81 -48
symai/backend/mixin/__init__.py +13 -0
symai/backend/mixin/anthropic.py +4 -2
symai/backend/mixin/deepseek.py +2 -0
symai/backend/mixin/google.py +2 -0
symai/backend/mixin/openai.py +11 -3
symai/backend/settings.py +83 -16
symai/chat.py +101 -78
symai/collect/__init__.py +7 -1
symai/collect/dynamic.py +77 -69
symai/collect/pipeline.py +35 -27
symai/collect/stats.py +75 -63
symai/components.py +198 -169
symai/constraints.py +15 -12
symai/core.py +698 -359
symai/core_ext.py +32 -34
symai/endpoints/api.py +80 -73
symai/extended/.DS_Store +0 -0
symai/extended/__init__.py +46 -12
symai/extended/api_builder.py +11 -8
symai/extended/arxiv_pdf_parser.py +13 -12
symai/extended/bibtex_parser.py +2 -3
symai/extended/conversation.py +101 -90
symai/extended/document.py +17 -10
symai/extended/file_merger.py +18 -13
symai/extended/graph.py +18 -13
symai/extended/html_style_template.py +2 -4
symai/extended/interfaces/blip_2.py +1 -2
symai/extended/interfaces/clip.py +1 -2
symai/extended/interfaces/console.py +7 -1
symai/extended/interfaces/dall_e.py +1 -1
symai/extended/interfaces/flux.py +1 -1
symai/extended/interfaces/gpt_image.py +1 -1
symai/extended/interfaces/input.py +1 -1
symai/extended/interfaces/llava.py +0 -1
symai/extended/interfaces/naive_vectordb.py +7 -8
symai/extended/interfaces/naive_webscraping.py +1 -1
symai/extended/interfaces/ocr.py +1 -1
symai/extended/interfaces/pinecone.py +6 -5
symai/extended/interfaces/serpapi.py +1 -1
symai/extended/interfaces/terminal.py +2 -3
symai/extended/interfaces/tts.py +1 -1
symai/extended/interfaces/whisper.py +1 -1
symai/extended/interfaces/wolframalpha.py +1 -1
symai/extended/metrics/__init__.py +11 -1
symai/extended/metrics/similarity.py +11 -13
symai/extended/os_command.py +17 -16
symai/extended/packages/__init__.py +29 -3
symai/extended/packages/symdev.py +19 -16
symai/extended/packages/sympkg.py +12 -9
symai/extended/packages/symrun.py +21 -19
symai/extended/repo_cloner.py +11 -10
symai/extended/seo_query_optimizer.py +1 -2
symai/extended/solver.py +20 -23
symai/extended/summarizer.py +4 -3
symai/extended/taypan_interpreter.py +10 -12
symai/extended/vectordb.py +99 -82
symai/formatter/__init__.py +9 -1
symai/formatter/formatter.py +12 -16
symai/formatter/regex.py +62 -63
symai/functional.py +176 -122
symai/imports.py +136 -127
symai/interfaces.py +56 -27
symai/memory.py +14 -13
symai/misc/console.py +49 -39
symai/misc/loader.py +5 -3
symai/models/__init__.py +17 -1
symai/models/base.py +269 -181
symai/models/errors.py +0 -1
symai/ops/__init__.py +32 -22
symai/ops/measures.py +11 -15
symai/ops/primitives.py +348 -228
symai/post_processors.py +32 -28
symai/pre_processors.py +39 -41
symai/processor.py +6 -4
symai/prompts.py +59 -45
symai/server/huggingface_server.py +23 -20
symai/server/llama_cpp_server.py +7 -5
symai/shell.py +3 -4
symai/shellsv.py +499 -375
symai/strategy.py +517 -287
symai/symbol.py +111 -116
symai/utils.py +42 -36
{symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/METADATA +4 -2
symbolicai-1.0.0.dist-info/RECORD +163 -0
symbolicai-0.20.2.dist-info/RECORD +0 -162
{symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/WHEEL +0 -0
{symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/entry_points.txt +0 -0
{symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/licenses/LICENSE +0 -0
{symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/top_level.txt +0 -0

symai/extended/repo_cloner.py CHANGED Viewed

@@ -1,9 +1,10 @@
 from pathlib import Path
-from typing import Optional
 from git import Repo
-from ..symbol import Expression
 from ..backend.settings import HOME_PATH
+from ..symbol import Expression
+from ..utils import UserMessage
 class RepositoryCloner(Expression):
@@ -16,7 +17,7 @@ class RepositoryCloner(Expression):
       repo_path (Optional[str]): The path where to clone the repository.
                                  By default it will be at '~/.symai/repos/'.
     """
-    def __init__(self, repo_path: Optional[str] = None, **kwargs):
+    def __init__(self, repo_path: str | None = None, **kwargs):
         super().__init__(**kwargs)
         self.repo_dir = HOME_PATH / 'repos/' if repo_path is None else Path(repo_path)
         if not self.repo_dir.exists():
@@ -35,24 +36,24 @@ class RepositoryCloner(Expression):
         """
         repo_name = url.split('/')[-1].replace('.git', '')
         if (self.repo_dir / repo_name).is_dir():
-            print(f'Repository {repo_name} already exists. Checking for updates...')
+            UserMessage(f'Repository {repo_name} already exists. Checking for updates...')
             try:
                 repo = Repo(self.repo_dir / repo_name)
                 current = repo.head.commit
                 repo.remotes.origin.pull()
                 if current != repo.head.commit:
-                    print(f'Repository {repo_name} updated.')
+                    UserMessage(f'Repository {repo_name} updated.')
                 else:
-                    print(f'Repository {repo_name} is up-to-date.')
+                    UserMessage(f'Repository {repo_name} is up-to-date.')
             except Exception as e:
-                print(f'An error occurred: {e}')
+                UserMessage(f'An error occurred: {e}')
                 raise e
         else:
-            print(f'Cloning repository {repo_name}...')
+            UserMessage(f'Cloning repository {repo_name}...')
             try:
                 Repo.clone_from(url, self.repo_dir / repo_name)
-                print(f'Repository {repo_name} cloned successfully.')
+                UserMessage(f'Repository {repo_name} cloned successfully.')
             except Exception as e:
-                print(f'Failed to clone the repository. An error occurred: {e}')
+                UserMessage(f'Failed to clone the repository. An error occurred: {e}')
                 raise e
         return str(self.repo_dir / repo_name)

symai/extended/seo_query_optimizer.py CHANGED Viewed

@@ -4,7 +4,6 @@ from ..pre_processors import PreProcessor
 from ..prompts import Prompt
 from ..symbol import Expression, Symbol
 SEO_OPTIMIZER_DESCRIPTION = """[Description]
 You are a SEO query optimizer. You are given a list of queries, phrases or sentences and you need to optimize them for search engines.
 Assume your search engines are based on vector databases and contain indices of GitHub repositories, papers and other resources.
@@ -17,7 +16,7 @@ The number of resulting queries should be between 1 and 8 statements separated b
 class SEOQueryOptimizerPreProcessor(PreProcessor):
     def __call__(self, argument):
-        return '$> {} =>'.format(str(argument.args[0]))
+        return f'$> {argument.args[0]!s} =>'
 class SEOQueryOptimizer(Expression):

symai/extended/solver.py CHANGED Viewed

@@ -1,16 +1,18 @@
 import argparse
 try:
     import z3
 except ImportError:
     z3 = None
-from .conversation import Conversation
 from .. import core
 from ..components import Execute
-from ..post_processors import StripPostProcessor, CodeExtractPostProcessor
+from ..post_processors import CodeExtractPostProcessor, StripPostProcessor
 from ..pre_processors import PreProcessor
 from ..prompts import Prompt
 from ..symbol import Expression, Symbol
+from ..utils import UserMessage
+from .conversation import Conversation
 #############################################################################################
 #
@@ -57,15 +59,17 @@ $> Max is 2 years older than his brother. In 5 years, Max will be 3 times as old
 class ProblemClassifierPreProcessor(PreProcessor):
     def __call__(self, argument):
-        return '$> {}\n//'.format(str(argument.prop.instance))
+        return f'$> {argument.prop.instance!s}\n//'
 class OptionsPreProcessor(PreProcessor):
     def __call__(self, argument):
-        return '$> :{}: == :{}: =>'.format(str(argument.prop.instance), str(argument.args[0]))
+        return f'$> :{argument.prop.instance!s}: == :{argument.args[0]!s}: =>'
 class ProblemClassifier(Expression):
+    __hash__ = Expression.__hash__
     @property
     def static_context(self):
         return PROBLEM_CATEGORY_CONTEXT
@@ -105,7 +109,7 @@ class ProblemClassifier(Expression):
 class FormulaCheckerPreProcessor(PreProcessor):
     def __call__(self, argument):
-        return '$> {} =>'.format(str(argument.prop.instance))
+        return f'$> {argument.prop.instance!s} =>'
 class FormulaChecker(Expression):
@@ -145,7 +149,7 @@ class FormulaChecker(Expression):
 class FormulaWriterPreProcessor(PreProcessor):
     def __call__(self, argument):
-        return '$> {} =>'.format(str(argument.prop.instance))
+        return f'$> {argument.prop.instance!s} =>'
 class FormulaWriter(Expression):
@@ -212,9 +216,8 @@ class SATSolver(Expression):
             m = S.model()
             # Return the solution
             return m[query]
-        else:
-            print("Cannot solve the puzzle. Returned: " + str(r))
-            return None
+        UserMessage("Cannot solve the puzzle. Returned: " + str(r))
+        return None
 #############################################################################################
@@ -244,32 +247,26 @@ class Solver(Expression):
         classifier = ProblemClassifier(sym)
         problem = classifier(**kwargs)
-        if 'Arithmetics formula' == problem:
-            formula = self.rewrite_formula(sym, **kwargs)
-            print(formula)
-        elif 'Equations' == problem:
+        if problem == 'Arithmetics formula' or problem == 'Equations':
             formula = self.rewrite_formula(sym, **kwargs)
-            print(formula)
-        elif 'Implication and logical expressions' == problem:
+            UserMessage(str(formula))
+        elif problem == 'Implication and logical expressions':
             res     = self.conv(sym, **kwargs)
             code    = self.pp(str(res), None, tag="python")
             formula = self.solver(code, lambda: 'German')
-            print(formula)
-        elif 'Probability and statistics' == problem:
-            raise NotImplementedError('This feature is not yet implemented.')
-        elif 'Linear algebra' == problem:
-            raise NotImplementedError('This feature is not yet implemented.')
-        elif 'Linguistic problem with relations' == problem:
-            raise NotImplementedError('This feature is not yet implemented.')
+            UserMessage(str(formula))
+        elif problem == 'Probability and statistics' or problem == 'Linear algebra' or problem == 'Linguistic problem with relations':
+            UserMessage('This feature is not yet implemented.', raise_with=NotImplementedError)
         else:
             return "Sorry, something went wrong. Please check if your backend is available and try again or report an issue to the devs. :("
+        return None
 def process_query(args) -> None:
     query = args.query
     solver = Solver()
     res = solver(query)
-    print(res)
+    UserMessage(str(res))
 def run() -> None:

symai/extended/summarizer.py CHANGED Viewed

@@ -1,13 +1,14 @@
-from typing import List
 from ..components import Clean, Outline, Sequence, Stream, Translate
 from ..symbol import Expression, Symbol
 class Summarizer(Expression):
-    def __init__(self, filters: List[Expression] = [], **kwargs):
+    def __init__(self, filters: list[Expression] | None = None, **kwargs):
+        if filters is None:
+            filters = []
         super().__init__(**kwargs)
-        filters = filters if isinstance(filters, List) or isinstance(filters, tuple) else [filters]
+        filters = filters if isinstance(filters, (list, tuple)) else [filters]
         self.data_stream = Stream(Sequence(
             Clean(),
             Translate(),

symai/extended/taypan_interpreter.py CHANGED Viewed

@@ -1,25 +1,24 @@
-import os
 import pathlib
-from ..core import *
-from ..pre_processors import PreProcessor
+from ..core import zero_shot
 from ..post_processors import CodeExtractPostProcessor
+from ..pre_processors import PreProcessor
 from ..symbol import Expression, Symbol
 def create_template():
-    package_path = os.path.dirname(pathlib.Path(__file__).parent.absolute())
+    package_path = pathlib.Path(__file__).parent.absolute()
-    with open(os.path.join(package_path, 'symbol.py'), 'r') as f:
+    with (package_path / 'symbol.py').open() as f:
         SYMBOL_API = f.read()
-    with open(os.path.join(package_path, 'components.py'), 'r') as f:
+    with (package_path / 'components.py').open() as f:
         COMPONENTS_API = f.read()
-    TAYPAN_DESCRIPTION = """[Description]
+    return f"""[Description]
     You are a programming language re-writing system from Taypan (high-level general-purpose programming language based on neuro-symbolic virtual machine) to Python interpreter, analogous to the relation between Scala and Java is the relation of Taypan to Python.
     All code from Python is valid Taypan code, but not all code from Taypan is valid Python code.
@@ -104,18 +103,17 @@ def create_template():
     [SymboliAI API]
     - components `from symai.components import *`:
-    {0}
+    {COMPONENTS_API}
     - symbol `from symai.symbol import *`:
-    {1}
+    {SYMBOL_API}
-    """.format(COMPONENTS_API, SYMBOL_API)
-    return TAYPAN_DESCRIPTION
+    """
 class TaypanPreProcessor(PreProcessor):
     def __call__(self, argument):
-        return '```taypan\n{}\n =>'.format(str(argument.args[0]))
+        return f'```taypan\n{argument.args[0]!s}\n =>'
 class TaypanInterpreter(Expression):

symai/extended/vectordb.py CHANGED Viewed

@@ -1,34 +1,40 @@
 import gzip
 import logging
-import os
 import pickle
+from collections.abc import Mapping
 from copy import deepcopy
 from pathlib import Path
+from typing import Any, ClassVar
 import numpy as np
 from ..backend.settings import HOME_PATH, SYMAI_CONFIG
 from ..interfaces import Interface
 from ..symbol import Expression, Symbol
-from ..utils import CustomUserWarning
-from .metrics import (adams_similarity, cosine_similarity,
-                      derridaean_similarity, dot_product, euclidean_metric,
-                      ranking_algorithm_sort)
+from ..utils import UserMessage
+from .metrics import (
+    adams_similarity,
+    cosine_similarity,
+    derridaean_similarity,
+    dot_product,
+    euclidean_metric,
+    ranking_algorithm_sort,
+)
 logging.getLogger('sentence_transformers').setLevel(logging.WARNING)
 logging.getLogger('datasets').setLevel(logging.WARNING)
 class VectorDB(Expression):
-    _default_documents = []
-    _default_vectors = None
-    _default_batch_size = 2048
-    _default_similarity_metric = "cosine"
-    _default_embedding_function = None
-    _default_index_dims = 768
-    _default_top_k = 5
-    _default_storage_path = os.path.join(HOME_PATH, "localdb")
-    _default_index_name = "dataindex"
+    _default_documents: ClassVar[list] = []
+    _default_vectors: ClassVar[np.ndarray | None] = None
+    _default_batch_size: ClassVar[int] = 2048
+    _default_similarity_metric: ClassVar[str] = "cosine"
+    _default_embedding_function: ClassVar[object | None] = None
+    _default_index_dims: ClassVar[int] = 768
+    _default_top_k: ClassVar[int] = 5
+    _default_storage_path: ClassVar[Path] = HOME_PATH / "localdb"
+    _default_index_name: ClassVar[str] = "dataindex"
     def __init__(
         self,
         documents=_default_documents,
@@ -71,12 +77,11 @@ class VectorDB(Expression):
         elif "adams" in similarity_metric:
             self.similarity_metric = adams_similarity
         else:
-            CustomUserWarning(f"Similarity metric not supported. Please use either 'dot', 'cosine', 'euclidean', 'adams', or 'derrida'.", raise_with=ValueError)
+            UserMessage("Similarity metric not supported. Please use either 'dot', 'cosine', 'euclidean', 'adams', or 'derrida'.", raise_with=ValueError)
         if load_on_init:
-            # If load_on_init is a string, use it as the storage file
-            if isinstance(load_on_init, str):
-                path = os.path.join(load_on_init, f"{self.index_name}.pkl")
+            if isinstance(load_on_init, (str, Path)):
+                path = Path(load_on_init) / f"{self.index_name}.pkl"
                 self.load(path)
             else:
                 self.load()
@@ -87,6 +92,52 @@ class VectorDB(Expression):
         else:
             self.model = lambda x: Symbol(x).embedding
+    def _unwrap_documents(self, documents):
+        if isinstance(documents, Symbol):
+            return documents.value
+        return documents
+    def _to_texts(self, documents, key):
+        if not isinstance(documents, list):
+            self._raise_texts_unassigned()
+        if len(documents) == 0:
+            return []
+        first_document = documents[0]
+        if isinstance(first_document, dict):
+            return self._texts_from_dicts(documents, key)
+        if isinstance(first_document, str):
+            return documents
+        return self._raise_texts_unassigned()
+    def _texts_from_dicts(self, documents, key):
+        if isinstance(key, str):
+            key_chain = key.split(".") if "." in key else [key]
+            return [self._resolve_key_chain(doc, key_chain).replace("\n", " ") for doc in documents]
+        if key is None:
+            return [
+                ", ".join([f"{dict_key}: {value}" for dict_key, value in doc.items()])
+                for doc in documents
+            ]
+        return self._raise_texts_unassigned()
+    def _resolve_key_chain(self, document, key_chain):
+        current_document = document
+        for chain_key in key_chain:
+            current_document = current_document[chain_key]
+        return current_document
+    def _embed_batch(self, batch):
+        emb = self.model(batch)
+        if len(emb.shape) == 1:
+            return [emb]
+        if len(emb.shape) == 2:
+            return [emb[index] for index in range(emb.shape[0])]
+        return UserMessage("Embeddings must be a 1D or 2D array.", raise_with=ValueError)
+    def _raise_texts_unassigned(self):
+        error_message = "local variable 'texts' referenced before assignment"
+        raise UnboundLocalError(error_message)
     def _get_embedding(self, documents, key=None):
         """
         Get embeddings from a list of documents.
@@ -103,48 +154,14 @@ class VectorDB(Expression):
         embeddings : numpy.ndarray
             A numpy array of embeddings.
         """
-        # unwrap the documents if they are a Symbol
-        if isinstance(documents, Symbol):
-            documents = documents.value
-        # if the documents are a list of Symbols, unwrap them
+        documents = self._unwrap_documents(documents)
         if len(documents) == 0:
             return []
-        if isinstance(documents, list):
-            # If the documents are a list of dictionaries, extract the text from the dictionary
-            if isinstance(documents[0], dict):
-                texts = []
-                # If a key is specified, extract the text from the dictionary using the key
-                if isinstance(key, str):
-                    if "." in key:
-                        key_chain = key.split(".")
-                    else:
-                        key_chain = [key]
-                    for doc in documents:
-                        for key in key_chain:
-                            doc = doc[key]
-                        texts.append(doc.replace("\n", " "))
-                # If no key is specified, extract the text from the dictionary using all keys
-                elif key is None:
-                    for doc in documents:
-                        text = ", ".join([f"{key}: {value}" for key, value in doc.items()])
-                        texts.append(text)
-            # If the documents are a list of strings, use the strings as the documents
-            elif isinstance(documents[0], str):
-                texts = documents
-            # If the documents are a list of lists, use the lists as the documents
-        batches = [texts[i : i + self.batch_size] for i in range(0, len(texts), self.batch_size)]
+        texts = self._to_texts(documents, key)
+        batches = [texts[index : index + self.batch_size] for index in range(0, len(texts), self.batch_size)]
         embeddings = []
-        # Embed the documents in batches
         for batch in batches:
-            # Extend the embeddings list with the embeddings from the batch
-            emb = self.model(batch)
-            if len(emb.shape) == 1:
-                embeddings.append(emb)
-            elif len(emb.shape) == 2:
-                for i in range(emb.shape[0]):
-                    embeddings.append(emb[i])
-            else:
-                CustomUserWarning("Embeddings must be a 1D or 2D array.", raise_with=ValueError)
+            embeddings.extend(self._embed_batch(batch))
         return embeddings
     def dict(self, vectors=False):
@@ -165,7 +182,7 @@ class VectorDB(Expression):
             return [
                 {"document": document, "vector": vector.tolist(), "index": index}
                 for index, (document, vector) in enumerate(
-                    zip(self.documents, self.vectors)
+                    zip(self.documents, self.vectors, strict=False)
                 )
             ]
         return [
@@ -191,8 +208,9 @@ class VectorDB(Expression):
         if not isinstance(documents, list):
             return self.add_document(documents, vectors)
         self.add_documents(documents, vectors)
+        return None
-    def add_document(self, document: dict, vector=None):
+    def add_document(self, document: Mapping[str, Any], vector=None):
         """
         Adds a document to the database.
@@ -208,9 +226,9 @@ class VectorDB(Expression):
         if self.vectors is None:
             self.vectors = np.empty((0, len(vector)), dtype=np.float32)
         elif len(vector) != self.vectors.shape[1]:
-            CustomUserWarning("All vectors must have the same length.", raise_with=ValueError)
+            UserMessage("All vectors must have the same length.", raise_with=ValueError)
         # convert the vector to a numpy array if it is not already
-        if type(vector) == list:
+        if isinstance(vector, list):
             vector = np.array(vector)
         self.vectors = np.vstack([self.vectors, vector]).astype(np.float32)
         self.documents.append(document)
@@ -243,7 +261,7 @@ class VectorDB(Expression):
         if not documents:
             return
         vectors = vectors or np.array(self.embedding_function(documents)).astype(np.float32)
-        for vector, document in zip(vectors, documents):
+        for vector, document in zip(vectors, documents, strict=False):
             self.add_document(document, vector)
     def clear(self):
@@ -254,7 +272,7 @@ class VectorDB(Expression):
         self.vectors   = None
         self.documents = []
-    def save(self, storage_file: str = None):
+    def save(self, storage_file: str | None = None):
         """
         Saves the database to a file.
@@ -265,20 +283,20 @@ class VectorDB(Expression):
         """
         if storage_file is None:
-            # use path to home directory by default
-            storage_path = os.path.join(HOME_PATH, "localdb")
-            os.makedirs(storage_path, exist_ok=True)
-            storage_file = os.path.join(storage_path, f"{self.index_name}.pkl")
+            storage_file = HOME_PATH / "localdb" / f"{self.index_name}.pkl"
+            storage_file.parent.mkdir(parents=True, exist_ok=True)
+        else:
+            storage_file = Path(storage_file)
         data = {"vectors": self.vectors, "documents": self.documents}
-        if storage_file.endswith(".gz"):
+        if storage_file.suffix == ".gz":
             with gzip.open(storage_file, "wb") as f:
                 pickle.dump(data, f)
         else:
-            with open(storage_file, "wb") as f:
+            with storage_file.open("wb") as f:
                 pickle.dump(data, f)
-    def load(self, storage_file : str = None):
+    def load(self, storage_file : str | None = None):
         """
         Loads the database from a file.
@@ -289,21 +307,20 @@ class VectorDB(Expression):
         """
         if storage_file is None:
-            # use path to home directory by default
-            storage_path = os.path.join(HOME_PATH, "localdb")
-            # create dir on first load if never used
-            os.makedirs(storage_path, exist_ok=True)
-            storage_file = os.path.join(storage_path, f"{self.index_name}.pkl")
+            storage_file = HOME_PATH / "localdb" / f"{self.index_name}.pkl"
+            storage_file.parent.mkdir(parents=True, exist_ok=True)
+        else:
+            storage_file = Path(storage_file)
         # return since nothing to load
-        if not os.path.exists(storage_file):
+        if not storage_file.exists():
             return
-        if storage_file.endswith(".gz"):
+        if storage_file.suffix == ".gz":
             with gzip.open(storage_file, "rb") as f:
                 data = pickle.load(f)
         else:
-            with open(storage_file, "rb") as f:
+            with storage_file.open("rb") as f:
                 data = pickle.load(f)
         self.vectors = data["vectors"].astype(np.float32) if data["vectors"] is not None else None
@@ -328,11 +345,11 @@ class VectorDB(Expression):
         # use path to home directory by default
         storage_path = symai_folder / "localdb"
         # create dir on first load if never used
-        os.makedirs(storage_path, exist_ok=True)
+        storage_path.mkdir(parents=True, exist_ok=True)
         storage_file = storage_path / f"{index_name}.pkl"
         if storage_file.exists():
             # remove the file
-            os.remove(storage_file)
+            storage_file.unlink()
         self.clear()
     def forward(self, query=None, vector=None, top_k=None, return_similarities=True):
@@ -354,14 +371,14 @@ class VectorDB(Expression):
             A list of results.
         """
-        assert self.vectors is not None, f"Error: Cannot query the database without prior insertion / initialization."
+        assert self.vectors is not None, "Error: Cannot query the database without prior insertion / initialization."
         top_k = top_k or self.index_top_k
         query_vector = self.embedding_function([query])[0] if vector is None else vector
-        if type(query_vector) == list:
+        if isinstance(query_vector, list):
             query_vector = np.array(query_vector)
         ranked_results, similarities = ranking_algorithm_sort(
             self.vectors, query_vector, top_k=top_k, metric=self.similarity_metric
         )
         if return_similarities:
-            return list(zip([self.documents[index] for index in ranked_results], similarities))
+            return list(zip([self.documents[index] for index in ranked_results], similarities, strict=False))
         return [self.documents[index] for index in ranked_results]

symai/formatter/__init__.py CHANGED Viewed

@@ -1,2 +1,10 @@
+from .formatter import ParagraphFormatter, RegexFormatter, SentenceFormatter, TextContainerFormatter
 from .regex import CHUNK_REGEX
-from .formatter import ParagraphFormatter, SentenceFormatter, RegexFormatter, TextContainerFormatter
+__all__ = [
+    "CHUNK_REGEX",
+    "ParagraphFormatter",
+    "RegexFormatter",
+    "SentenceFormatter",
+    "TextContainerFormatter",
+]

symai/formatter/formatter.py CHANGED Viewed

@@ -1,12 +1,16 @@
 import re
+from typing import TYPE_CHECKING
 from beartype import beartype
 from beartype.typing import Any, Dict, List
 from tqdm import tqdm
-from .regex import CHUNK_REGEX
 from .. import core_ext
 from ..symbol import Expression, Symbol
+from .regex import CHUNK_REGEX
+if TYPE_CHECKING:
+    from ..backend.engines.files.engine_io import TextContainer
 class ParagraphFormatter(Expression):
@@ -22,7 +26,7 @@ class ParagraphFormatter(Expression):
             # split text file-wise and create a map of file names and their contents
             files = {}
             split_text = input_.split('# ----[FILE_START]')
-            for i, file in enumerate(split_text):
+            for _i, file in enumerate(split_text):
                 if not file.strip():
                     continue
                 _, content_file = file.split('[FILE_CONTENT]:')
@@ -109,7 +113,7 @@ class ParagraphFormatter(Expression):
                 paragraphs.append(text)
         return paragraphs
-    def forward(self, sym: Symbol, *args, **kwargs) -> Symbol:
+    def forward(self, sym: Symbol, *_args, **_kwargs) -> Symbol:
         sym = self._to_symbol(sym)
         # split text paragraph-wise and index each paragraph separately
         self.elements = self.split_files(sym.value)
@@ -128,13 +132,9 @@ class SentenceFormatter(Expression):
         input_ = input_text.strip()
         split_text = self.SENTENCES_RE.split(input_)  # regex splitting
-        sentences = [s.strip() + ".\n" for s in split_text if s.strip()]
-        # s.strip() + ".\n" ensures that all lines in the sentence end with a period and newline
-        # s.strip() == True if sentence has other characters than whitespace
+        return [s.strip() + ".\n" for s in split_text if s.strip()]
-        return sentences
-    def forward(self, sym: Symbol, *args, **kwargs) -> Symbol:
+    def forward(self, sym: Symbol, *_args, **_kwargs) -> Symbol:
         sym = self._to_symbol(sym)
         # split text sentence-wise and index each sentence separately
         self.elements = self.split_sentences(sym.value)
@@ -151,12 +151,9 @@ class RegexFormatter(Expression):
         input_ = input_text.strip()
         split_text = self.SENTENCES_RE.split(input_)  # regex splitting
-        chunks = [s.strip() for s in split_text if s.strip()]
-        # s.strip() == True if sentence has other characters than whitespace
-        return chunks
+        return [s.strip() for s in split_text if s.strip()]
-    def forward(self, sym: Symbol, *args, **kwargs) -> Symbol:
+    def forward(self, sym: Symbol, *_args, **_kwargs) -> Symbol:
         sym = self._to_symbol(sym)
         # split text sentence-wise and index each sentence separately
         self.elements = self.split_sentences(sym.value)
@@ -176,7 +173,7 @@ class TextContainerFormatter(Expression):
         self.text_split = text_split
     @beartype
-    def forward(self, sym: Symbol, *args, **kwargs) -> Symbol:
+    def forward(self, sym: Symbol, *_args, **_kwargs) -> Symbol:
         if isinstance(sym.value, list):
             containers = [container for pdf in sym.value for container in pdf]
         chunks = [text for container in tqdm(containers) for text in self._chunk(container)]
@@ -205,4 +202,3 @@ class TextContainerFormatter(Expression):
             '---\n'
             f"{text}"
         )

symbolicai 0.20.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

symbolicai 0.20.2py3-none-any.whl → 1.0.0py3-none-any.whl