PyPI - symbolicai - Versions diffs - 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

symbolicai 1.0.0py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (127) hide show

symai/__init__.py +198 -134
symai/backend/base.py +51 -51
symai/backend/engines/drawing/engine_bfl.py +33 -33
symai/backend/engines/drawing/engine_gpt_image.py +4 -10
symai/backend/engines/embedding/engine_llama_cpp.py +50 -35
symai/backend/engines/embedding/engine_openai.py +22 -16
symai/backend/engines/execute/engine_python.py +16 -16
symai/backend/engines/files/engine_io.py +51 -49
symai/backend/engines/imagecaptioning/engine_blip2.py +27 -23
symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +53 -46
symai/backend/engines/index/engine_pinecone.py +116 -88
symai/backend/engines/index/engine_qdrant.py +1011 -0
symai/backend/engines/index/engine_vectordb.py +78 -52
symai/backend/engines/lean/engine_lean4.py +65 -25
symai/backend/engines/neurosymbolic/__init__.py +28 -28
symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +137 -135
symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +145 -152
symai/backend/engines/neurosymbolic/engine_cerebras.py +328 -0
symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +75 -49
symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +199 -155
symai/backend/engines/neurosymbolic/engine_groq.py +106 -72
symai/backend/engines/neurosymbolic/engine_huggingface.py +100 -67
symai/backend/engines/neurosymbolic/engine_llama_cpp.py +121 -93
symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +213 -132
symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +180 -137
symai/backend/engines/ocr/engine_apilayer.py +18 -20
symai/backend/engines/output/engine_stdout.py +9 -9
symai/backend/engines/{webscraping → scrape}/engine_requests.py +25 -11
symai/backend/engines/search/engine_openai.py +95 -83
symai/backend/engines/search/engine_parallel.py +665 -0
symai/backend/engines/search/engine_perplexity.py +40 -41
symai/backend/engines/search/engine_serpapi.py +33 -28
symai/backend/engines/speech_to_text/engine_local_whisper.py +37 -27
symai/backend/engines/symbolic/engine_wolframalpha.py +14 -8
symai/backend/engines/text_to_speech/engine_openai.py +15 -19
symai/backend/engines/text_vision/engine_clip.py +34 -28
symai/backend/engines/userinput/engine_console.py +3 -4
symai/backend/mixin/anthropic.py +48 -40
symai/backend/mixin/deepseek.py +4 -5
symai/backend/mixin/google.py +5 -4
symai/backend/mixin/groq.py +2 -4
symai/backend/mixin/openai.py +132 -110
symai/backend/settings.py +14 -14
symai/chat.py +164 -94
symai/collect/dynamic.py +13 -11
symai/collect/pipeline.py +39 -31
symai/collect/stats.py +109 -69
symai/components.py +556 -238
symai/constraints.py +14 -5
symai/core.py +1495 -1210
symai/core_ext.py +55 -50
symai/endpoints/api.py +113 -58
symai/extended/api_builder.py +22 -17
symai/extended/arxiv_pdf_parser.py +13 -5
symai/extended/bibtex_parser.py +8 -4
symai/extended/conversation.py +88 -69
symai/extended/document.py +40 -27
symai/extended/file_merger.py +45 -7
symai/extended/graph.py +38 -24
symai/extended/html_style_template.py +17 -11
symai/extended/interfaces/blip_2.py +1 -1
symai/extended/interfaces/clip.py +4 -2
symai/extended/interfaces/console.py +5 -3
symai/extended/interfaces/dall_e.py +3 -1
symai/extended/interfaces/file.py +2 -0
symai/extended/interfaces/flux.py +3 -1
symai/extended/interfaces/gpt_image.py +15 -6
symai/extended/interfaces/input.py +2 -1
symai/extended/interfaces/llava.py +1 -1
symai/extended/interfaces/{naive_webscraping.py → naive_scrape.py} +3 -2
symai/extended/interfaces/naive_vectordb.py +2 -2
symai/extended/interfaces/ocr.py +4 -2
symai/extended/interfaces/openai_search.py +2 -0
symai/extended/interfaces/parallel.py +30 -0
symai/extended/interfaces/perplexity.py +2 -0
symai/extended/interfaces/pinecone.py +6 -4
symai/extended/interfaces/python.py +2 -0
symai/extended/interfaces/serpapi.py +2 -0
symai/extended/interfaces/terminal.py +0 -1
symai/extended/interfaces/tts.py +2 -1
symai/extended/interfaces/whisper.py +2 -1
symai/extended/interfaces/wolframalpha.py +1 -0
symai/extended/metrics/__init__.py +1 -1
symai/extended/metrics/similarity.py +5 -2
symai/extended/os_command.py +31 -22
symai/extended/packages/symdev.py +39 -34
symai/extended/packages/sympkg.py +30 -27
symai/extended/packages/symrun.py +46 -35
symai/extended/repo_cloner.py +10 -9
symai/extended/seo_query_optimizer.py +15 -12
symai/extended/solver.py +104 -76
symai/extended/summarizer.py +8 -7
symai/extended/taypan_interpreter.py +10 -9
symai/extended/vectordb.py +28 -15
symai/formatter/formatter.py +39 -31
symai/formatter/regex.py +46 -44
symai/functional.py +184 -86
symai/imports.py +85 -51
symai/interfaces.py +1 -1
symai/memory.py +33 -24
symai/menu/screen.py +28 -19
symai/misc/console.py +27 -27
symai/misc/loader.py +4 -3
symai/models/base.py +147 -76
symai/models/errors.py +1 -1
symai/ops/__init__.py +1 -1
symai/ops/measures.py +17 -14
symai/ops/primitives.py +933 -635
symai/post_processors.py +28 -24
symai/pre_processors.py +58 -52
symai/processor.py +15 -9
symai/prompts.py +714 -649
symai/server/huggingface_server.py +115 -32
symai/server/llama_cpp_server.py +14 -6
symai/server/qdrant_server.py +206 -0
symai/shell.py +98 -39
symai/shellsv.py +307 -223
symai/strategy.py +135 -81
symai/symbol.py +276 -225
symai/utils.py +62 -46
{symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/METADATA +19 -9
symbolicai-1.1.0.dist-info/RECORD +168 -0
symbolicai-1.0.0.dist-info/RECORD +0 -163
{symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/WHEEL +0 -0
{symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/entry_points.txt +0 -0
{symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/licenses/LICENSE +0 -0
{symbolicai-1.0.0.dist-info → symbolicai-1.1.0.dist-info}/top_level.txt +0 -0

symai/components.py CHANGED Viewed

@@ -11,7 +11,11 @@ from string import ascii_lowercase, ascii_uppercase
 from threading import Lock
 from typing import TYPE_CHECKING, Union
+if TYPE_CHECKING:
+    from typing import Any
 import numpy as np
+from beartype import beartype
 from box import Box
 from loguru import logger
 from pyvis.network import Network
@@ -43,33 +47,42 @@ _DEFAULT_PARAGRAPH_FORMATTER = ParagraphFormatter()
 class GraphViz(Expression):
-    def __init__(self,
-                 notebook = True,
-                 cdn_resources = "remote",
-                 bgcolor = "#222222",
-                 font_color = "white",
-                 height = "750px",
-                 width = "100%",
-                 select_menu = True,
-                 filter_menu = True,
-                 **kwargs):
+    def __init__(
+        self,
+        notebook=True,
+        cdn_resources="remote",
+        bgcolor="#222222",
+        font_color="white",
+        height="750px",
+        width="100%",
+        select_menu=True,
+        filter_menu=True,
+        **kwargs,
+    ):
         super().__init__(**kwargs)
-        self.net  = Network(notebook=notebook,
-                            cdn_resources=cdn_resources,
-                            bgcolor=bgcolor,
-                            font_color=font_color,
-                            height=height,
-                            width=width,
-                            select_menu=select_menu,
-                            filter_menu=filter_menu)
+        self.net = Network(
+            notebook=notebook,
+            cdn_resources=cdn_resources,
+            bgcolor=bgcolor,
+            font_color=font_color,
+            height=height,
+            width=width,
+            select_menu=select_menu,
+            filter_menu=filter_menu,
+        )
     def forward(self, sym: Symbol, file_path: str, **_kwargs):
         nodes = [str(n) if n.value else n.__repr__(simplified=True) for n in sym.nodes]
-        edges = [(str(e[0]) if e[0].value else e[0].__repr__(simplified=True),
-                  str(e[1]) if e[1].value else e[1].__repr__(simplified=True)) for e in sym.edges]
+        edges = [
+            (
+                str(e[0]) if e[0].value else e[0].__repr__(simplified=True),
+                str(e[1]) if e[1].value else e[1].__repr__(simplified=True),
+            )
+            for e in sym.edges
+        ]
         self.net.add_nodes(nodes)
         self.net.add_edges(edges)
-        file_path = file_path if file_path.endswith('.html') else file_path + '.html'
+        file_path = file_path if file_path.endswith(".html") else file_path + ".html"
         return self.net.show(file_path)
@@ -109,12 +122,14 @@ class Try(Expression):
 class Lambda(Expression):
     def __init__(self, callable: Callable, **kwargs):
         super().__init__(**kwargs)
         def _callable(*args, **kwargs):
             kw = {
-                'args': args,
-                'kwargs': kwargs,
+                "args": args,
+                "kwargs": kwargs,
             }
             return callable(kw)
         self.callable: Callable = _callable
     def forward(self, *args, **kwargs) -> Symbol:
@@ -140,8 +155,8 @@ class Output(Expression):
         self.verbose: bool = verbose
     def forward(self, *args, **kwargs) -> Expression:
-        kwargs['verbose'] = self.verbose
-        kwargs['handler'] = self.handler
+        kwargs["verbose"] = self.verbose
+        kwargs["handler"] = self.handler
         return self.output(*args, expr=self.expr, **kwargs)
@@ -166,32 +181,34 @@ class Sequence(TrackerTraceable):
 class Parallel(Expression):
     def __init__(self, *expr: list[Expression | Callable], sequential: bool = False, **kwargs):
         super().__init__(**kwargs)
-        self.sequential: bool       = sequential
+        self.sequential: bool = sequential
         self.expr: list[Expression] = expr
-        self.results: list[Symbol]  = []
+        self.results: list[Symbol] = []
     def forward(self, *args, **kwargs) -> Symbol:
         # run in sequence
         if self.sequential:
             return [e(*args, **kwargs) for e in self.expr]
         # run in parallel
         @core_ext.parallel(self.expr)
         def _func(e, *args, **kwargs):
             return e(*args, **kwargs)
         self.results = _func(*args, **kwargs)
         # final result of the parallel execution
         return self._to_symbol(self.results)
-#@TODO: BinPacker(format="...") -> ensure that data packages form a "bin" that's consistent (e.g. never break a sentence in the middle)
+# @TODO: BinPacker(format="...") -> ensure that data packages form a "bin" that's consistent (e.g. never break a sentence in the middle)
 class Stream(Expression):
     def __init__(self, expr: Expression | None = None, retrieval: str | None = None, **kwargs):
         super().__init__(**kwargs)
-        self.char_token_ratio:    float = 0.6
+        self.char_token_ratio: float = 0.6
         self.expr: Expression | None = expr
-        self.retrieval:   str | None = retrieval
-        self._trace:               bool = False
-        self._previous_frame            = None
+        self.retrieval: str | None = retrieval
+        self._trace: bool = False
+        self._previous_frame = None
     def forward(self, sym: Symbol, **kwargs) -> Iterator:
         sym = self._to_symbol(sym)
@@ -213,17 +230,15 @@ class Stream(Expression):
                     raise_with=ValueError,
                 )
-        res = sym.stream(expr=self.expr,
-                         char_token_ratio=self.char_token_ratio,
-                         **kwargs)
+        res = sym.stream(expr=self.expr, char_token_ratio=self.char_token_ratio, **kwargs)
         if self.retrieval is not None:
             res = list(res)
-            if self.retrieval == 'all':
+            if self.retrieval == "all":
                 return res
-            if self.retrieval == 'longest':
+            if self.retrieval == "longest":
                 res = sorted(res, key=lambda x: len(x), reverse=True)
                 return res[0]
-            if self.retrieval == 'contains':
+            if self.retrieval == "contains":
                 return [r for r in res if self.expr in r]
             UserMessage(f"Invalid retrieval method: {self.retrieval}", raise_with=ValueError)
@@ -241,7 +256,7 @@ class Stream(Expression):
 class Trace(Expression):
     def __init__(self, expr: Expression | None = None, engines=None, **kwargs):
         if engines is None:
-            engines = ['all']
+            engines = ["all"]
         super().__init__(**kwargs)
         self.expr: Expression = expr
         self.engines: list[str] = engines
@@ -278,7 +293,7 @@ class Analyze(Expression):
 class Log(Expression):
     def __init__(self, expr: Expression | None = None, engines=None, **kwargs):
         if engines is None:
-            engines = ['all']
+            engines = ["all"]
         super().__init__(**kwargs)
         self.expr: Expression = expr
         self.engines: list[str] = engines
@@ -303,7 +318,12 @@ class Log(Expression):
 class Template(Expression):
-    def __init__(self, template: str = "<html><body>{{placeholder}}</body></html>", placeholder: str = '{{placeholder}}', **kwargs):
+    def __init__(
+        self,
+        template: str = "<html><body>{{placeholder}}</body></html>",
+        placeholder: str = "{{placeholder}}",
+        **kwargs,
+    ):
         super().__init__(**kwargs)
         self.placeholder = placeholder
         self.template_ = template
@@ -333,21 +353,25 @@ class RuntimeExpression(Expression):
         code = self._to_symbol(code)
         # declare the runtime expression from the code
         expr = self.runner(code)
         def _func(sym):
             # execute nested expression
-            return expr['locals']['_output_'](sym)
+            return expr["locals"]["_output_"](sym)
         return _func
 class Metric(Expression):
     def __init__(self, normalize: bool = False, eps: float = 1e-8, **kwargs):
         super().__init__(**kwargs)
-        self.normalize  = normalize
-        self.eps        = eps
+        self.normalize = normalize
+        self.eps = eps
     def forward(self, sym: Symbol, **_kwargs) -> Symbol:
         sym = self._to_symbol(sym)
-        assert sym.value_type is np.ndarray or sym.value_type is list, 'Metric can only be applied to numpy arrays or lists.'
+        assert sym.value_type is np.ndarray or sym.value_type is list, (
+            "Metric can only be applied to numpy arrays or lists."
+        )
         if sym.value_type is list:
             sym._value = np.array(sym.value)
         # compute normalization between 0 and 1
@@ -357,7 +381,7 @@ class Metric(Expression):
             elif len(sym.value.shape) == 2:
                 pass
             else:
-                UserMessage(f'Invalid shape: {sym.value.shape}', raise_with=ValueError)
+                UserMessage(f"Invalid shape: {sym.value.shape}", raise_with=ValueError)
             # normalize between 0 and 1 and sum to 1
             sym._value = np.exp(sym.value) / (np.exp(sym.value).sum() + self.eps)
         return sym
@@ -413,16 +437,16 @@ _output_ = _func()
     def forward(self, sym: Symbol, enclosure: bool = False, **kwargs) -> Symbol:
         if enclosure or self.enclosure:
-            lines = str(sym).split('\n')
-            lines = ['    ' + line for line in lines]
-            sym = '\n'.join(lines)
-            sym = self.template.replace('{sym}', str(sym))
+            lines = str(sym).split("\n")
+            lines = ["    " + line for line in lines]
+            sym = "\n".join(lines)
+            sym = self.template.replace("{sym}", str(sym))
         sym = self._to_symbol(sym)
         return sym.execute(**kwargs)
 class Convert(Expression):
-    def __init__(self, format: str = 'Python', **kwargs):
+    def __init__(self, format: str = "Python", **kwargs):
         super().__init__(**kwargs)
         self.format = format
@@ -456,13 +480,13 @@ class Map(Expression):
 class Translate(Expression):
-    def __init__(self, language: str = 'English', **kwargs):
+    def __init__(self, language: str = "English", **kwargs):
         super().__init__(**kwargs)
         self.language = language
     def forward(self, sym: Symbol, **kwargs) -> Symbol:
         sym = self._to_symbol(sym)
-        if sym.isinstanceof(f'{self.language} text'):
+        if sym.isinstanceof(f"{self.language} text"):
             return sym
         return sym.translate(language=self.language, **kwargs)
@@ -494,7 +518,7 @@ class FileWriter(Expression):
     def forward(self, sym: Symbol, **_kwargs) -> Symbol:
         sym = self._to_symbol(sym)
-        with self.path.open('w') as f:
+        with self.path.open("w") as f:
             f.write(str(sym))
@@ -502,18 +526,18 @@ class FileReader(Expression):
     @staticmethod
     def exists(path: str) -> bool:
         # remove slicing if any
-        _tmp     = path
-        _splits  = _tmp.split('[')
-        if '[' in _tmp:
+        _tmp = path
+        _splits = _tmp.split("[")
+        if "[" in _tmp:
             _tmp = _splits[0]
-        assert len(_splits) == 1 or len(_splits) == 2, 'Invalid file link format.'
-        _tmp     = Path(_tmp)
+        assert len(_splits) == 1 or len(_splits) == 2, "Invalid file link format."
+        _tmp = Path(_tmp)
         # check if file exists and is a file
         return _tmp.is_file()
     @staticmethod
     def get_files(folder_path: str, max_depth: int = 1) -> list[str]:
-        accepted_formats = ['.pdf', '.md', '.txt']
+        accepted_formats = [".pdf", ".md", ".txt"]
         folder = Path(folder_path)
         files = []
@@ -527,9 +551,34 @@ class FileReader(Expression):
     @staticmethod
     def extract_files(cmds: str) -> list[str] | None:
-        # Use the updated regular expression to match quoted and non-quoted paths
-        pattern = r'''(?:"((?:\\.|[^"\\])*)"|'((?:\\.|[^'\\])*)'|`((?:\\.|[^`\\])*)`|((?:\\ |[^ ])+))'''
-        # Use the regular expression to split and handle quoted and non-quoted paths
+        """
+        Extract file paths from a command string, handling various quoting styles.
+        This method is used by the Qdrant RAG implementation when processing document paths.
+        It uses regex to parse file paths that may be quoted in different ways.
+        Regex patterns used:
+        1. Main pattern: Matches file paths in four formats:
+           - Double-quoted: "path/to/file" (handles escaped characters)
+           - Single-quoted: 'path/to/file' (handles escaped characters)
+           - Backtick-quoted: `path/to/file` (handles escaped characters)
+           - Non-quoted: path/to/file (handles escaped spaces)
+        2. Escape removal pattern: r"\\(.)" -> r"\1"
+           - Removes backslash escape sequences from quoted paths
+           - Example: "path\\/to\\/file" -> "path/to/file"
+           - Used for double quotes, single quotes, and backticks
+        """
+        # Regex pattern to match file paths in various quoting styles
+        # Pattern breakdown:
+        # - (?:"((?:\\.|[^"\\])*)") : Matches double-quoted paths, capturing content while handling escapes
+        # - '((?:\\.|[^'\\])*)' : Matches single-quoted paths, capturing content while handling escapes
+        # - `((?:\\.|[^`\\])*)` : Matches backtick-quoted paths, capturing content while handling escapes
+        # - ((?:\\ |[^ ])+) : Matches non-quoted paths, allowing escaped spaces
+        pattern = (
+            r"""(?:"((?:\\.|[^"\\])*)"|'((?:\\.|[^'\\])*)'|`((?:\\.|[^`\\])*)`|((?:\\ |[^ ])+))"""
+        )
+        # Use regex to find all file path matches in the command string
         matches = re.findall(pattern, cmds)
         # Process the matches to handle quoted paths and normal paths
         files = []
@@ -537,23 +586,27 @@ class FileReader(Expression):
             # Each match will have 4 groups due to the pattern; only one will be non-empty
             quoted_double, quoted_single, quoted_backtick, non_quoted = match
             if quoted_double:
-                # Remove backslashes used for escaping inside double quotes
-                path = re.sub(r'\\(.)', r'\1', quoted_double)
+                # Regex substitution: Remove backslashes used for escaping inside double quotes
+                # Pattern r"\\(.)" matches a backslash followed by any character and replaces with just the character
+                # Example: "path\\/to\\/file" -> "path/to/file"
+                path = re.sub(r"\\(.)", r"\1", quoted_double)
                 file = FileReader.expand_user_path(path)
                 files.append(file)
             elif quoted_single:
-                # Remove backslashes used for escaping inside single quotes
-                path = re.sub(r'\\(.)', r'\1', quoted_single)
+                # Regex substitution: Remove backslashes used for escaping inside single quotes
+                # Same pattern as above, applied to single-quoted paths
+                path = re.sub(r"\\(.)", r"\1", quoted_single)
                 file = FileReader.expand_user_path(path)
                 files.append(file)
             elif quoted_backtick:
-                # Remove backslashes used for escaping inside backticks
-                path = re.sub(r'\\(.)', r'\1', quoted_backtick)
+                # Regex substitution: Remove backslashes used for escaping inside backticks
+                # Same pattern as above, applied to backtick-quoted paths
+                path = re.sub(r"\\(.)", r"\1", quoted_backtick)
                 file = FileReader.expand_user_path(path)
                 files.append(file)
             elif non_quoted:
-                # Replace escaped spaces with actual spaces
-                path = non_quoted.replace('\\ ', ' ')
+                # Replace escaped spaces with actual spaces (no regex needed here, simple string replace)
+                path = non_quoted.replace("\\ ", " ")
                 file = FileReader.expand_user_path(path)
                 files.append(file)
         # Filter out any files that do not exist
@@ -571,25 +624,28 @@ class FileReader(Expression):
             if FileReader.exists(file):
                 not_skipped.append(file)
             else:
-                UserMessage(f'Skipping file: {file}')
+                UserMessage(f"Skipping file: {file}")
         return not_skipped
     def forward(self, files: str | list[str], **kwargs) -> Expression:
         if isinstance(files, str):
             # Convert to list for uniform processing; more easily downstream
             files = [files]
-        if kwargs.get('run_integrity_check'):
+        if kwargs.get("run_integrity_check"):
             files = self.integrity_check(files)
         return self.sym_return_type([self.open(f, **kwargs).value for f in files])
 class FileQuery(Expression):
     def __init__(self, path: str, filter: str, **kwargs):
         super().__init__(**kwargs)
         self.path = path
         file_open = FileReader()
-        self.query_stream = Stream(Sequence(
-            IncludeFilter(filter),
-        ))
+        self.query_stream = Stream(
+            Sequence(
+                IncludeFilter(filter),
+            )
+        )
         self.file = file_open(path)
     def forward(self, sym: Symbol, **kwargs) -> Symbol:
@@ -599,42 +655,45 @@ class FileQuery(Expression):
 class Function(TrackerTraceable):
-    def __init__(self, prompt: str       = '',
-                 examples: str | None = [],
-                 pre_processors: list[PreProcessor] | None   = None,
-                 post_processors: list[PostProcessor] | None = None,
-                 default: object | None       = None,
-                 constraints: list[Callable] | None     = None,
-                 return_type: type | None     = str,
-                 sym_return_type: type | None = Symbol,
-                 origin_type: type | None     = Expression,
-                 *args, **kwargs):
+    def __init__(
+        self,
+        prompt: str = "",
+        examples: str | None = [],
+        pre_processors: list[PreProcessor] | None = None,
+        post_processors: list[PostProcessor] | None = None,
+        default: object | None = None,
+        constraints: list[Callable] | None = None,
+        return_type: type | None = str,
+        sym_return_type: type | None = Symbol,
+        origin_type: type | None = Expression,
+        *args,
+        **kwargs,
+    ):
         if constraints is None:
             constraints = []
         super().__init__(**kwargs)
-        chars       = ascii_lowercase + ascii_uppercase
-        self.name   = 'func_' + ''.join(sample(chars, 15))
-        self.args   = args
+        chars = ascii_lowercase + ascii_uppercase
+        self.name = "func_" + "".join(sample(chars, 15))
+        self.args = args
         self.kwargs = kwargs
-        self._promptTemplate     = prompt
-        self._promptFormatArgs   = []
+        self._promptTemplate = prompt
+        self._promptFormatArgs = []
         self._promptFormatKwargs = {}
-        self.examples        = Prompt(examples)
-        self.pre_processors  = pre_processors
+        self.examples = Prompt(examples)
+        self.pre_processors = pre_processors
         self.post_processors = post_processors
-        self.constraints     = constraints
-        self.default         = default
-        self.return_type     = return_type
+        self.constraints = constraints
+        self.default = default
+        self.return_type = return_type
         self.sym_return_type = sym_return_type
-        self.origin_type     = origin_type
+        self.origin_type = origin_type
     @property
     def prompt(self):
         # return a copy of the prompt template
         if len(self._promptFormatArgs) == 0 and len(self._promptFormatKwargs) == 0:
             return self._promptTemplate
-        return f"{self._promptTemplate}".format(*self._promptFormatArgs,
-                                                **self._promptFormatKwargs)
+        return f"{self._promptTemplate}".format(*self._promptFormatArgs, **self._promptFormatKwargs)
     def format(self, *args, **kwargs):
         self._promptFormatArgs = args
@@ -642,9 +701,10 @@ class Function(TrackerTraceable):
     def forward(self, *args, **kwargs) -> Expression:
         # special case for few shot function prompt definition override
-        if 'fn' in kwargs:
-            self.prompt = kwargs['fn']
-            del kwargs['fn']
+        if "fn" in kwargs:
+            self.prompt = kwargs["fn"]
+            del kwargs["fn"]
         @core.few_shot(
             *self.args,
             prompt=self.prompt,
@@ -653,19 +713,24 @@ class Function(TrackerTraceable):
             post_processors=self.post_processors,
             constraints=self.constraints,
             default=self.default,
-            **self.kwargs
+            **self.kwargs,
         )
         def _func(_, *args, **kwargs) -> self.return_type:
             pass
-        _type = type(self.name, (self.origin_type, ), {
-            # constructor
-            "forward": _func,
-            "sym_return_type": self.sym_return_type,
-            "static_context": self.static_context,
-            "dynamic_context": self.dynamic_context,
-            "__class__": self.__class__,
-            "__module__": self.__module__,
-        })
+        _type = type(
+            self.name,
+            (self.origin_type,),
+            {
+                # constructor
+                "forward": _func,
+                "sym_return_type": self.sym_return_type,
+                "static_context": self.static_context,
+                "dynamic_context": self.dynamic_context,
+                "__class__": self.__class__,
+                "__module__": self.__module__,
+            },
+        )
         obj = _type()
         return self._to_symbol(obj(*args, **kwargs))
@@ -676,7 +741,7 @@ class PrepareData(Function):
         def __call__(self, argument):
             assert argument.prop.context is not None
             instruct = argument.prop.prompt
-            context  = argument.prop.context
+            context = argument.prop.context
             return f"""{{
     'context': '{context}',
     'instruction': '{instruct}',
@@ -685,10 +750,10 @@ class PrepareData(Function):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.pre_processors  = [self.PrepareDataPreProcessor()]
-        self.constraints     = [DictFormatConstraint({ 'result': '<the data>' })]
+        self.pre_processors = [self.PrepareDataPreProcessor()]
+        self.constraints = [DictFormatConstraint({"result": "<the data>"})]
         self.post_processors = [JsonTruncateMarkdownPostProcessor()]
-        self.return_type     = dict # constraint to cast the result to a dict
+        self.return_type = dict  # constraint to cast the result to a dict
     @property
     def static_context(self):
@@ -723,7 +788,7 @@ Your goal is to prepare the data for the next task instruction. The data should
 class ExpressionBuilder(Function):
     def __init__(self, **kwargs):
-        super().__init__('Generate the code following the instructions:', **kwargs)
+        super().__init__("Generate the code following the instructions:", **kwargs)
         self.processors = ProcessorPipeline([StripPostProcessor(), CodeExtractPostProcessor()])
     def forward(self, instruct, *_args, **_kwargs):
@@ -774,10 +839,12 @@ Always produce the entire code to be executed in the same Python process. All ta
 class JsonParser(Expression):
     def __init__(self, query: str, json_: dict, **kwargs):
         super().__init__(**kwargs)
-        func = Function(prompt=JsonPromptTemplate(query, json_),
-                        constraints=[DictFormatConstraint(json_)],
-                        pre_processors=[JsonPreProcessor()],
-                        post_processors=[JsonTruncatePostProcessor()])
+        func = Function(
+            prompt=JsonPromptTemplate(query, json_),
+            constraints=[DictFormatConstraint(json_)],
+            pre_processors=[JsonPreProcessor()],
+            post_processors=[JsonTruncatePostProcessor()],
+        )
         self.fn = Try(func, retries=1)
     def forward(self, sym: Symbol, **kwargs) -> Symbol:
@@ -787,21 +854,27 @@ class JsonParser(Expression):
 class SimilarityClassification(Expression):
-    def __init__(self, classes: list[str], metric: str = 'cosine', in_memory: bool = False, **kwargs):
+    def __init__(
+        self, classes: list[str], metric: str = "cosine", in_memory: bool = False, **kwargs
+    ):
         super().__init__(**kwargs)
-        self.classes   = classes
-        self.metric    = metric
+        self.classes = classes
+        self.metric = metric
         self.in_memory = in_memory
         if self.in_memory:
-            UserMessage(f'Caching mode is enabled! It is your responsability to empty the .cache folder if you did changes to the classes. The cache is located at {HOME_PATH}/cache')
+            UserMessage(
+                f"Caching mode is enabled! It is your responsability to empty the .cache folder if you did changes to the classes. The cache is located at {HOME_PATH}/cache"
+            )
     def forward(self, x: Symbol) -> Symbol:
-        x            = self._to_symbol(x)
-        usr_embed    = x.embed()
-        embeddings   = self._dynamic_cache()
+        x = self._to_symbol(x)
+        usr_embed = x.embed()
+        embeddings = self._dynamic_cache()
         similarities = [usr_embed.similarity(emb, metric=self.metric) for emb in embeddings]
-        similarities = sorted(zip(self.classes, similarities, strict=False), key=lambda x: x[1], reverse=True)
+        similarities = sorted(
+            zip(self.classes, similarities, strict=False), key=lambda x: x[1], reverse=True
+        )
         return Symbol(similarities[0][0])
@@ -820,11 +893,7 @@ class InContextClassification(Expression):
         self.blueprint = blueprint
     def forward(self, x: Symbol, **kwargs) -> Symbol:
-        @core.few_shot(
-            prompt=x,
-            examples=self.blueprint,
-            **kwargs
-        )
+        @core.few_shot(prompt=x, examples=self.blueprint, **kwargs)
         def _func(_):
             pass
@@ -832,38 +901,38 @@ class InContextClassification(Expression):
 class Indexer(Expression):
-    DEFAULT = 'dataindex'
+    DEFAULT = "dataindex"
     @staticmethod
     def replace_special_chars(index: str):
         # replace special characters that are not for path
-        return str(index).replace('-', '').replace('_', '').replace(' ', '').lower()
+        return str(index).replace("-", "").replace("_", "").replace(" ", "").lower()
     def __init__(
-            self,
-            index_name: str = DEFAULT,
-            top_k: int = 8,
-            batch_size: int = 20,
-            formatter: Callable = _DEFAULT_PARAGRAPH_FORMATTER,
-            auto_add=False,
-            raw_result: bool = False,
-            new_dim: int = 1536,
-            **kwargs
-        ):
+        self,
+        index_name: str = DEFAULT,
+        top_k: int = 8,
+        batch_size: int = 20,
+        formatter: Callable = _DEFAULT_PARAGRAPH_FORMATTER,
+        auto_add=False,
+        raw_result: bool = False,
+        new_dim: int = 1536,
+        **kwargs,
+    ):
         super().__init__(**kwargs)
         index_name = Indexer.replace_special_chars(index_name)
         self.index_name = index_name
-        self.elements   = []
+        self.elements = []
         self.batch_size = batch_size
-        self.top_k      = top_k
-        self.retrieval  = None
-        self.formatter  = formatter
+        self.top_k = top_k
+        self.retrieval = None
+        self.formatter = formatter
         self.raw_result = raw_result
-        self.new_dim    = new_dim
+        self.new_dim = new_dim
         self.sym_return_type = Expression
         # append index name to indices.txt in home directory .symai folder (default)
-        self.path = HOME_PATH / 'indices.txt'
+        self.path = HOME_PATH / "indices.txt"
         if not self.path.exists():
             self.path.parent.mkdir(parents=True, exist_ok=True)
             self.path.touch()
@@ -874,51 +943,62 @@ class Indexer(Expression):
         # check if index already exists in indices.txt and append if not
         change = False
         with self.path.open() as f:
-            indices = f.read().split('\n')
+            indices = f.read().split("\n")
             # filter out empty strings
             indices = [i for i in indices if i]
         if self.index_name not in indices:
-                indices.append(self.index_name)
-                change = True
+            indices.append(self.index_name)
+            change = True
         if change:
-            with self.path.open('w') as f:
-                f.write('\n'.join(indices))
+            with self.path.open("w") as f:
+                f.write("\n".join(indices))
     def exists(self) -> bool:
         # check if index exists in home directory .symai folder (default) indices.txt
-        path = HOME_PATH / 'indices.txt'
+        path = HOME_PATH / "indices.txt"
         if not path.exists():
             return False
         with path.open() as f:
-            indices = f.read().split('\n')
+            indices = f.read().split("\n")
             if self.index_name in indices:
                 return True
         return False
     def forward(
-            self,
-            data: Symbol | None = None,
-            _raw_result: bool = False,
-        ) -> Symbol:
+        self,
+        data: Symbol | None = None,
+        _raw_result: bool = False,
+    ) -> Symbol:
         that = self
         if data is not None:
             data = self._to_symbol(data)
             self.elements = self.formatter(data).value
             # run over the elments in batches
             for i in tqdm(range(0, len(self.elements), self.batch_size)):
-                val = Symbol(self.elements[i:i+self.batch_size]).zip(new_dim=self.new_dim)
+                val = Symbol(self.elements[i : i + self.batch_size]).zip(new_dim=self.new_dim)
                 that.add(val, index_name=that.index_name, index_dims=that.new_dim)
             # we save the index
             that.config(None, save=True, index_name=that.index_name, index_dims=that.new_dim)
-        def _func(query, *_args, **kwargs) -> Union[Symbol, 'VectorDBResult']:
-            raw_result = kwargs.get('raw_result') or that.raw_result
+        def _func(query, *_args, **kwargs) -> Union[Symbol, "VectorDBResult"]:
+            raw_result = kwargs.get("raw_result") or that.raw_result
             query_emb = Symbol(query).embed(new_dim=that.new_dim).value
-            res = that.get(query_emb, index_name=that.index_name, index_top_k=that.top_k, ori_query=query, index_dims=that.new_dim, **kwargs)
+            res = that.get(
+                query_emb,
+                index_name=that.index_name,
+                index_top_k=that.top_k,
+                ori_query=query,
+                index_dims=that.new_dim,
+                **kwargs,
+            )
             that.retrieval = res
             if raw_result:
                 return res
-            return Symbol(res).query(prompt='From the retrieved data, select the most relevant information.', context=query)
+            return Symbol(res).query(
+                prompt="From the retrieved data, select the most relevant information.",
+                context=query,
+            )
         return _func
@@ -930,7 +1010,7 @@ class PrimitiveDisabler(Expression):
     def __enter__(self):
         # Import Symbol lazily so components does not clash with symbol during load.
-        from .symbol import Symbol # noqa
+        from .symbol import Symbol  # noqa
         frame = inspect.currentframe()
         f_locals = frame.f_back.f_locals
@@ -957,7 +1037,7 @@ class PrimitiveDisabler(Expression):
         for sym in self._symbols.values():
             for primitive in sym._primitives:
                 for method, _ in inspect.getmembers(primitive, predicate=inspect.isfunction):
-                    if method in self._primitives or method.startswith('_'):
+                    if method in self._primitives or method.startswith("_"):
                         continue
                     self._primitives.add(method)
@@ -1002,9 +1082,7 @@ class FunctionWithUsage(Function):
         self.total_tokens += usage.total_tokens
     def get_usage(self):
-        return self._format_usage(
-            self.prompt_tokens, self.completion_tokens, self.total_tokens
-        )
+        return self._format_usage(self.prompt_tokens, self.completion_tokens, self.total_tokens)
     def forward(self, *args, **kwargs):
         if "return_metadata" not in kwargs:
@@ -1015,9 +1093,7 @@ class FunctionWithUsage(Function):
         raw_output = metadata.get("raw_output")
         if hasattr(raw_output, "usage"):
             usage = raw_output.usage
-            prompt_tokens = (
-                usage.prompt_tokens if hasattr(usage, "prompt_tokens") else 0
-            )
+            prompt_tokens = usage.prompt_tokens if hasattr(usage, "prompt_tokens") else 0
             completion_tokens = (
                 usage.completion_tokens if hasattr(usage, "completion_tokens") else 0
             )
@@ -1033,7 +1109,9 @@ class FunctionWithUsage(Function):
             self.total_tokens += total_tokens
         else:
             if self.missing_usage_exception and "preview" not in kwargs:
-                UserMessage("Missing usage in metadata of neursymbolic engine", raise_with=Exception)
+                UserMessage(
+                    "Missing usage in metadata of neursymbolic engine", raise_with=Exception
+                )
             prompt_tokens = 0
             completion_tokens = 0
             total_tokens = 0
@@ -1042,12 +1120,12 @@ class FunctionWithUsage(Function):
 class SelfPrompt(Expression):
-    _default_retry_tries     = 20
-    _default_retry_delay     = 0.5
+    _default_retry_tries = 20
+    _default_retry_delay = 0.5
     _default_retry_max_delay = -1
-    _default_retry_backoff   = 1
-    _default_retry_jitter    = 0
-    _default_retry_graceful  = True
+    _default_retry_backoff = 1
+    _default_retry_jitter = 0
+    _default_retry_graceful = True
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -1061,14 +1139,21 @@ class SelfPrompt(Expression):
         :return: A dictionary containing the new prompts in the same format:
                  {'user': '...', 'system': '...'}
         """
-        tries     = kwargs.get('tries', self._default_retry_tries)
-        delay     = kwargs.get('delay', self._default_retry_delay)
-        max_delay = kwargs.get('max_delay', self._default_retry_max_delay)
-        backoff   = kwargs.get('backoff', self._default_retry_backoff)
-        jitter    = kwargs.get('jitter', self._default_retry_jitter)
-        graceful  = kwargs.get('graceful', self._default_retry_graceful)
-        @core_ext.retry(tries=tries, delay=delay, max_delay=max_delay, backoff=backoff, jitter=jitter, graceful=graceful)
+        tries = kwargs.get("tries", self._default_retry_tries)
+        delay = kwargs.get("delay", self._default_retry_delay)
+        max_delay = kwargs.get("max_delay", self._default_retry_max_delay)
+        backoff = kwargs.get("backoff", self._default_retry_backoff)
+        jitter = kwargs.get("jitter", self._default_retry_jitter)
+        graceful = kwargs.get("graceful", self._default_retry_graceful)
+        @core_ext.retry(
+            tries=tries,
+            delay=delay,
+            max_delay=max_delay,
+            backoff=backoff,
+            jitter=jitter,
+            graceful=graceful,
+        )
         @core.zero_shot(
             prompt=(
                 "Based on the following prompt, generate a new system (or developer) prompt and a new user prompt. "
@@ -1077,18 +1162,19 @@ class SelfPrompt(Expression):
                 "The new user prompt should contain the user's requirements. "
                 "Check if the input contains a 'system' or 'developer' key and use the same key in your output. "
                 "Only output the new prompts in JSON format as shown:\n\n"
-                "{\"system\": \"<new system prompt>\", \"user\": \"<new user prompt>\"}\n\n"
+                '{"system": "<new system prompt>", "user": "<new user prompt>"}\n\n'
                 "OR\n\n"
-                "{\"developer\": \"<new developer prompt>\", \"user\": \"<new user prompt>\"}\n\n"
+                '{"developer": "<new developer prompt>", "user": "<new user prompt>"}\n\n'
                 "Maintain the same key structure as in the input prompt. Do not include any additional text."
             ),
             response_format={"type": "json_object"},
             post_processors=[
                 lambda res, _: json.loads(res),
             ],
-            **kwargs
+            **kwargs,
         )
-        def _func(self, sym: Symbol): pass
+        def _func(self, sym: Symbol):
+            pass
         return _func(self, self._to_symbol(existing_prompt))
@@ -1104,15 +1190,19 @@ class MetadataTracker(Expression):
     def __str__(self, value=None):
         value = value or self.metadata
         if isinstance(value, dict):
-            return '{\n\t' + ', \n\t'.join(f'"{k}": {self.__str__(v)}' for k,v in value.items()) + '\n}'
+            return (
+                "{\n\t"
+                + ", \n\t".join(f'"{k}": {self.__str__(v)}' for k, v in value.items())
+                + "\n}"
+            )
         if isinstance(value, list):
-            return '[' + ', '.join(self.__str__(item) for item in value) + ']'
+            return "[" + ", ".join(self.__str__(item) for item in value) + "]"
         if isinstance(value, str):
             return f'"{value}"'
         return f"\n\t    {value}"
     def __new__(cls, *_args, **_kwargs):
-        cls._lock = getattr(cls, '_lock', Lock())
+        cls._lock = getattr(cls, "_lock", Lock())
         with cls._lock:
             instance = super().__new__(cls)
             instance._metadata = {}
@@ -1135,14 +1225,14 @@ class MetadataTracker(Expression):
             return None
         if (
-            event == 'return'
-            and frame.f_code.co_name == 'forward'
-            and 'self' in frame.f_locals
-            and isinstance(frame.f_locals['self'], Engine)
+            event == "return"
+            and frame.f_code.co_name == "forward"
+            and "self" in frame.f_locals
+            and isinstance(frame.f_locals["self"], Engine)
         ):
             _, metadata = arg  # arg contains return value on 'return' event
-            engine_name = frame.f_locals['self'].__class__.__name__
-            model_name = frame.f_locals['self'].model
+            engine_name = frame.f_locals["self"].__class__.__name__
+            model_name = frame.f_locals["self"].model
             self._metadata[(self._metadata_id, engine_name, model_name)] = metadata
             self._metadata_id += 1
@@ -1162,38 +1252,91 @@ class MetadataTracker(Expression):
             try:
                 if engine_name == "GroqEngine":
                     usage = metadata["raw_output"].usage
-                    token_details[(engine_name, model_name)]["usage"]["completion_tokens"] += usage.completion_tokens
-                    token_details[(engine_name, model_name)]["usage"]["prompt_tokens"] += usage.prompt_tokens
-                    token_details[(engine_name, model_name)]["usage"]["total_tokens"] += usage.total_tokens
+                    token_details[(engine_name, model_name)]["usage"]["completion_tokens"] += (
+                        usage.completion_tokens
+                    )
+                    token_details[(engine_name, model_name)]["usage"]["prompt_tokens"] += (
+                        usage.prompt_tokens
+                    )
+                    token_details[(engine_name, model_name)]["usage"]["total_tokens"] += (
+                        usage.total_tokens
+                    )
                     token_details[(engine_name, model_name)]["usage"]["total_calls"] += 1
                     #!: Backward compatibility for components like `RuntimeInfo`
-                    token_details[(engine_name, model_name)]["prompt_breakdown"]["cached_tokens"] += 0 # Assignment not allowed with defualtdict
-                    token_details[(engine_name, model_name)]["completion_breakdown"]["reasoning_tokens"] += 0
+                    token_details[(engine_name, model_name)]["prompt_breakdown"][
+                        "cached_tokens"
+                    ] += 0  # Assignment not allowed with defualtdict
+                    token_details[(engine_name, model_name)]["completion_breakdown"][
+                        "reasoning_tokens"
+                    ] += 0
+                elif engine_name == "ParallelEngine":
+                    token_details[(engine_name, None)]["usage"]["total_calls"] += 1
+                    # There are no model-specific tokens for this engine
+                    token_details[(engine_name, None)]["usage"]["completion_tokens"] += 0
+                    token_details[(engine_name, None)]["usage"]["prompt_tokens"] += 0
+                    token_details[(engine_name, None)]["usage"]["total_tokens"] += 0
+                    #!: Backward compatibility for components like `RuntimeInfo`
+                    token_details[(engine_name, None)]["prompt_breakdown"]["cached_tokens"] += (
+                        0  # Assignment not allowed with defualtdict
+                    )
+                    token_details[(engine_name, None)]["completion_breakdown"][
+                        "reasoning_tokens"
+                    ] += 0
                 elif engine_name in ("GPTXChatEngine", "GPTXReasoningEngine"):
                     usage = metadata["raw_output"].usage
-                    token_details[(engine_name, model_name)]["usage"]["completion_tokens"] += usage.completion_tokens
-                    token_details[(engine_name, model_name)]["usage"]["prompt_tokens"] += usage.prompt_tokens
-                    token_details[(engine_name, model_name)]["usage"]["total_tokens"] += usage.total_tokens
+                    token_details[(engine_name, model_name)]["usage"]["completion_tokens"] += (
+                        usage.completion_tokens
+                    )
+                    token_details[(engine_name, model_name)]["usage"]["prompt_tokens"] += (
+                        usage.prompt_tokens
+                    )
+                    token_details[(engine_name, model_name)]["usage"]["total_tokens"] += (
+                        usage.total_tokens
+                    )
                     token_details[(engine_name, model_name)]["usage"]["total_calls"] += 1
-                    token_details[(engine_name, model_name)]["completion_breakdown"]["accepted_prediction_tokens"] += usage.completion_tokens_details.accepted_prediction_tokens
-                    token_details[(engine_name, model_name)]["completion_breakdown"]["rejected_prediction_tokens"] += usage.completion_tokens_details.rejected_prediction_tokens
-                    token_details[(engine_name, model_name)]["completion_breakdown"]["audio_tokens"] += usage.completion_tokens_details.audio_tokens
-                    token_details[(engine_name, model_name)]["completion_breakdown"]["reasoning_tokens"] += usage.completion_tokens_details.reasoning_tokens
-                    token_details[(engine_name, model_name)]["prompt_breakdown"]["audio_tokens"] += usage.prompt_tokens_details.audio_tokens
-                    token_details[(engine_name, model_name)]["prompt_breakdown"]["cached_tokens"] += usage.prompt_tokens_details.cached_tokens
+                    token_details[(engine_name, model_name)]["completion_breakdown"][
+                        "accepted_prediction_tokens"
+                    ] += usage.completion_tokens_details.accepted_prediction_tokens
+                    token_details[(engine_name, model_name)]["completion_breakdown"][
+                        "rejected_prediction_tokens"
+                    ] += usage.completion_tokens_details.rejected_prediction_tokens
+                    token_details[(engine_name, model_name)]["completion_breakdown"][
+                        "audio_tokens"
+                    ] += usage.completion_tokens_details.audio_tokens
+                    token_details[(engine_name, model_name)]["completion_breakdown"][
+                        "reasoning_tokens"
+                    ] += usage.completion_tokens_details.reasoning_tokens
+                    token_details[(engine_name, model_name)]["prompt_breakdown"][
+                        "audio_tokens"
+                    ] += usage.prompt_tokens_details.audio_tokens
+                    token_details[(engine_name, model_name)]["prompt_breakdown"][
+                        "cached_tokens"
+                    ] += usage.prompt_tokens_details.cached_tokens
                 elif engine_name == "GPTXSearchEngine":
                     usage = metadata["raw_output"].usage
-                    token_details[(engine_name, model_name)]["usage"]["prompt_tokens"] += usage.input_tokens
-                    token_details[(engine_name, model_name)]["usage"]["completion_tokens"] += usage.output_tokens
-                    token_details[(engine_name, model_name)]["usage"]["total_tokens"] += usage.total_tokens
+                    token_details[(engine_name, model_name)]["usage"]["prompt_tokens"] += (
+                        usage.input_tokens
+                    )
+                    token_details[(engine_name, model_name)]["usage"]["completion_tokens"] += (
+                        usage.output_tokens
+                    )
+                    token_details[(engine_name, model_name)]["usage"]["total_tokens"] += (
+                        usage.total_tokens
+                    )
                     token_details[(engine_name, model_name)]["usage"]["total_calls"] += 1
-                    token_details[(engine_name, model_name)]["prompt_breakdown"]["cached_tokens"] += usage.input_tokens_details.cached_tokens
-                    token_details[(engine_name, model_name)]["completion_breakdown"]["reasoning_tokens"] += usage.output_tokens_details.reasoning_tokens
+                    token_details[(engine_name, model_name)]["prompt_breakdown"][
+                        "cached_tokens"
+                    ] += usage.input_tokens_details.cached_tokens
+                    token_details[(engine_name, model_name)]["completion_breakdown"][
+                        "reasoning_tokens"
+                    ] += usage.output_tokens_details.reasoning_tokens
                 else:
                     logger.warning(f"Tracking {engine_name} is not supported.")
                     continue
             except Exception as e:
-                UserMessage(f"Failed to parse metadata for {engine_name}: {e}", raise_with=AttributeError)
+                UserMessage(
+                    f"Failed to parse metadata for {engine_name}: {e}", raise_with=AttributeError
+                )
         # Convert to normal dict
         return {**token_details}
@@ -1203,22 +1346,24 @@ class MetadataTracker(Expression):
         return engine_name in supported_engines
     def _accumulate_time_field(self, accumulated: dict, metadata: dict) -> None:
-        if 'time' in metadata and 'time' in accumulated:
-            accumulated['time'] += metadata['time']
+        if "time" in metadata and "time" in accumulated:
+            accumulated["time"] += metadata["time"]
     def _accumulate_usage_fields(self, accumulated: dict, metadata: dict) -> None:
-        if 'raw_output' not in metadata or 'raw_output' not in accumulated:
+        if "raw_output" not in metadata or "raw_output" not in accumulated:
             return
-        metadata_raw_output = metadata['raw_output']
-        accumulated_raw_output = accumulated['raw_output']
-        if not hasattr(metadata_raw_output, 'usage') or not hasattr(accumulated_raw_output, 'usage'):
+        metadata_raw_output = metadata["raw_output"]
+        accumulated_raw_output = accumulated["raw_output"]
+        if not hasattr(metadata_raw_output, "usage") or not hasattr(
+            accumulated_raw_output, "usage"
+        ):
             return
         current_usage = metadata_raw_output.usage
         accumulated_usage = accumulated_raw_output.usage
-        for attr in ['completion_tokens', 'prompt_tokens', 'total_tokens']:
+        for attr in ["completion_tokens", "prompt_tokens", "total_tokens"]:
             if hasattr(current_usage, attr) and hasattr(accumulated_usage, attr):
                 setattr(
                     accumulated_usage,
@@ -1226,20 +1371,24 @@ class MetadataTracker(Expression):
                     getattr(accumulated_usage, attr) + getattr(current_usage, attr),
                 )
-        for detail_attr in ['completion_tokens_details', 'prompt_tokens_details']:
-            if not hasattr(current_usage, detail_attr) or not hasattr(accumulated_usage, detail_attr):
+        for detail_attr in ["completion_tokens_details", "prompt_tokens_details"]:
+            if not hasattr(current_usage, detail_attr) or not hasattr(
+                accumulated_usage, detail_attr
+            ):
                 continue
             current_details = getattr(current_usage, detail_attr)
             accumulated_details = getattr(accumulated_usage, detail_attr)
             for attr in dir(current_details):
-                if attr.startswith('_') or not hasattr(accumulated_details, attr):
+                if attr.startswith("_") or not hasattr(accumulated_details, attr):
                     continue
                 current_val = getattr(current_details, attr)
                 accumulated_val = getattr(accumulated_details, attr)
-                if isinstance(current_val, (int, float)) and isinstance(accumulated_val, (int, float)):
+                if isinstance(current_val, (int, float)) and isinstance(
+                    accumulated_val, (int, float)
+                ):
                     setattr(accumulated_details, attr, accumulated_val + current_val)
     def _accumulate_metadata(self):
@@ -1255,7 +1404,9 @@ class MetadataTracker(Expression):
         # Skipz first entry
         for (_, engine_name), metadata in list(self._metadata.items())[1:]:
             if not self._can_accumulate_engine(engine_name):
-                logger.warning(f"Metadata accumulation for {engine_name} is not supported. Try `.usage` instead for now.")
+                logger.warning(
+                    f"Metadata accumulation for {engine_name} is not supported. Try `.usage` instead for now."
+                )
                 continue
             self._accumulate_time_field(accumulated, metadata)
@@ -1278,6 +1429,7 @@ class MetadataTracker(Expression):
 class DynamicEngine(Expression):
     """Context manager for dynamically switching neurosymbolic engine models."""
     def __init__(self, model: str, api_key: str, _debug: bool = False, **_kwargs):
         super().__init__()
         self.model = model
@@ -1288,7 +1440,7 @@ class DynamicEngine(Expression):
         self._ctx_token = None
     def __new__(cls, *_args, **_kwargs):
-        cls._lock = getattr(cls, '_lock', Lock())
+        cls._lock = getattr(cls, "_lock", Lock())
         with cls._lock:
             instance = super().__new__(cls)
             instance._metadata = {}
@@ -1322,11 +1474,177 @@ class DynamicEngine(Expression):
     def _create_engine_instance(self):
         """Create an engine instance based on the model name."""
         # Deferred to avoid components <-> neurosymbolic engine circular imports.
-        from .backend.engines.neurosymbolic import ENGINE_MAPPING # noqa
+        from .backend.engines.neurosymbolic import ENGINE_MAPPING  # noqa
         try:
             engine_class = ENGINE_MAPPING.get(self.model)
             if engine_class is None:
                 UserMessage(f"Unsupported model '{self.model}'", raise_with=ValueError)
             return engine_class(api_key=self.api_key, model=self.model)
         except Exception as e:
-            UserMessage(f"Failed to create engine for model '{self.model}': {e!s}", raise_with=ValueError)
+            UserMessage(
+                f"Failed to create engine for model '{self.model}': {e!s}", raise_with=ValueError
+            )
+# Chonkie chunker imports - lazy loaded
+_CHONKIE_MODULES = None
+_CHUNKER_MAPPING = None
+_CHONKIE_AVAILABLE = None
+def _lazy_import_chonkie():
+    """Lazily import chonkie modules when needed."""
+    global _CHONKIE_MODULES, _CHUNKER_MAPPING, _CHONKIE_AVAILABLE
+    if _CHONKIE_MODULES is not None:
+        return _CHONKIE_MODULES
+    try:
+        from chonkie import (  # noqa
+            CodeChunker,
+            LateChunker,
+            NeuralChunker,
+            RecursiveChunker,
+            SemanticChunker,
+            SentenceChunker,
+            SlumberChunker,
+            TableChunker,
+            TokenChunker,
+        )
+        from chonkie.embeddings.base import BaseEmbeddings  # noqa
+        from tokenizers import Tokenizer  # noqa
+        _CHONKIE_MODULES = {
+            "CodeChunker": CodeChunker,
+            "LateChunker": LateChunker,
+            "NeuralChunker": NeuralChunker,
+            "RecursiveChunker": RecursiveChunker,
+            "SemanticChunker": SemanticChunker,
+            "SentenceChunker": SentenceChunker,
+            "SlumberChunker": SlumberChunker,
+            "TableChunker": TableChunker,
+            "TokenChunker": TokenChunker,
+            "BaseEmbeddings": BaseEmbeddings,
+            "Tokenizer": Tokenizer,
+        }
+        _CHUNKER_MAPPING = {
+            "TokenChunker": TokenChunker,
+            "SentenceChunker": SentenceChunker,
+            "RecursiveChunker": RecursiveChunker,
+            "SemanticChunker": SemanticChunker,
+            "CodeChunker": CodeChunker,
+            "LateChunker": LateChunker,
+            "NeuralChunker": NeuralChunker,
+            "SlumberChunker": SlumberChunker,
+            "TableChunker": TableChunker,
+        }
+        _CHONKIE_AVAILABLE = True
+    except ImportError:
+        _CHONKIE_MODULES = {}
+        _CHUNKER_MAPPING = {}
+        _CHONKIE_AVAILABLE = False
+    return _CHONKIE_MODULES
+def _get_chunker_mapping():
+    """Get the chunker mapping, lazily importing chonkie if needed."""
+    if _CHUNKER_MAPPING is None:
+        _lazy_import_chonkie()
+    return _CHUNKER_MAPPING or {}
+def _is_chonkie_available():
+    """Check if chonkie is available, lazily importing if needed."""
+    if _CHONKIE_AVAILABLE is None:
+        _lazy_import_chonkie()
+    return _CHONKIE_AVAILABLE or False
+@beartype
+class ChonkieChunker(Expression):
+    def __init__(
+        self,
+        tokenizer_name: str | None = "gpt2",
+        embedding_model_name: str | None = "minishlab/potion-base-8M",
+        **symai_kwargs,
+    ):
+        super().__init__(**symai_kwargs)
+        self.tokenizer_name = tokenizer_name
+        self.embedding_model_name = embedding_model_name
+    def forward(
+        self, data: Symbol, chunker_name: str | None = "RecursiveChunker", **chunker_kwargs
+    ) -> Symbol:
+        if not _is_chonkie_available():
+            UserMessage(
+                "chonkie library is not installed. Please install it with `pip install chonkie tokenizers`.",
+                raise_with=ImportError,
+            )
+        chunker = self._resolve_chunker(chunker_name, **chunker_kwargs)
+        chunks = [ChonkieChunker.clean_text(chunk.text) for chunk in chunker(data.value)]
+        return self._to_symbol(chunks)
+    def _resolve_chunker(self, chunker_name: str, **chunker_kwargs):
+        """Resolve and instantiate a chunker by name."""
+        chunker_mapping = _get_chunker_mapping()
+        if chunker_name not in chunker_mapping:
+            msg = (
+                f"Chunker {chunker_name} not found. Available chunkers: {list(chunker_mapping.keys())}. "
+                f"See docs (https://docs.chonkie.ai/getting-started/introduction) for more info."
+            )
+            raise ValueError(msg)
+        chunker_class = chunker_mapping[chunker_name]
+        chonkie_modules = _lazy_import_chonkie()
+        Tokenizer = chonkie_modules.get("Tokenizer")
+        # Tokenizer-based chunkers (use tokenizer_name)
+        if chunker_name in ["TokenChunker", "SentenceChunker", "RecursiveChunker"]:
+            if Tokenizer is None:
+                UserMessage(
+                    "Tokenizers library is not installed. Please install it with `pip install tokenizers`.",
+                    raise_with=ImportError,
+                )
+            tokenizer = Tokenizer.from_pretrained(self.tokenizer_name)
+            return chunker_class(tokenizer, **chunker_kwargs)
+        # Embedding-based chunkers (use embedding_model_name)
+        if chunker_name in ["SemanticChunker", "LateChunker"]:
+            return chunker_class(embedding_model=self.embedding_model_name, **chunker_kwargs)
+        # CodeChunker and TableChunker use tokenizer (can use string or Tokenizer object)
+        if chunker_name in ["CodeChunker", "TableChunker"]:
+            # These can accept tokenizer as string (default 'character') or Tokenizer object
+            # If tokenizer not provided in kwargs, use tokenizer_name
+            if "tokenizer" not in chunker_kwargs:
+                chunker_kwargs["tokenizer"] = self.tokenizer_name
+            return chunker_class(**chunker_kwargs)
+        # SlumberChunker uses tokenizer (can use string or Tokenizer object)
+        if chunker_name == "SlumberChunker":
+            # SlumberChunker can accept tokenizer as string or Tokenizer object
+            # If tokenizer not provided in kwargs, use tokenizer_name
+            if "tokenizer" not in chunker_kwargs:
+                chunker_kwargs["tokenizer"] = self.tokenizer_name
+            return chunker_class(**chunker_kwargs)
+        # NeuralChunker uses model parameter (defaults provided by chonkie)
+        if chunker_name == "NeuralChunker":
+            return chunker_class(**chunker_kwargs)
+        msg = (
+            f"Chunker {chunker_name} not properly configured. "
+            f"Available chunkers: {list(chunker_mapping.keys())}."
+        )
+        raise ValueError(msg)
+    @staticmethod
+    def clean_text(text: str) -> str:
+        """Cleans text by removing problematic characters."""
+        text = text.replace("\x00", "")  # Remove null bytes (\x00)
+        return text.encode("utf-8", errors="ignore").decode(
+            "utf-8"
+        )  # Replace invalid UTF-8 sequences

symbolicai 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

symbolicai 1.0.0py3-none-any.whl → 1.1.0py3-none-any.whl