PyPI - symbolicai - Versions diffs - 0.20.2__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

symbolicai 0.20.2py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (123) hide show

symai/__init__.py +96 -64
symai/backend/base.py +93 -80
symai/backend/engines/drawing/engine_bfl.py +12 -11
symai/backend/engines/drawing/engine_gpt_image.py +108 -87
symai/backend/engines/embedding/engine_llama_cpp.py +25 -28
symai/backend/engines/embedding/engine_openai.py +3 -5
symai/backend/engines/execute/engine_python.py +6 -5
symai/backend/engines/files/engine_io.py +74 -67
symai/backend/engines/imagecaptioning/engine_blip2.py +3 -3
symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +54 -38
symai/backend/engines/index/engine_pinecone.py +23 -24
symai/backend/engines/index/engine_vectordb.py +16 -14
symai/backend/engines/lean/engine_lean4.py +38 -34
symai/backend/engines/neurosymbolic/__init__.py +41 -13
symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +262 -182
symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +263 -191
symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +53 -49
symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +212 -211
symai/backend/engines/neurosymbolic/engine_groq.py +87 -63
symai/backend/engines/neurosymbolic/engine_huggingface.py +21 -24
symai/backend/engines/neurosymbolic/engine_llama_cpp.py +117 -48
symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +256 -229
symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +270 -150
symai/backend/engines/ocr/engine_apilayer.py +6 -8
symai/backend/engines/output/engine_stdout.py +1 -4
symai/backend/engines/search/engine_openai.py +7 -7
symai/backend/engines/search/engine_perplexity.py +5 -5
symai/backend/engines/search/engine_serpapi.py +12 -14
symai/backend/engines/speech_to_text/engine_local_whisper.py +20 -27
symai/backend/engines/symbolic/engine_wolframalpha.py +3 -3
symai/backend/engines/text_to_speech/engine_openai.py +5 -7
symai/backend/engines/text_vision/engine_clip.py +7 -11
symai/backend/engines/userinput/engine_console.py +3 -3
symai/backend/engines/webscraping/engine_requests.py +81 -48
symai/backend/mixin/__init__.py +13 -0
symai/backend/mixin/anthropic.py +4 -2
symai/backend/mixin/deepseek.py +2 -0
symai/backend/mixin/google.py +2 -0
symai/backend/mixin/openai.py +11 -3
symai/backend/settings.py +83 -16
symai/chat.py +101 -78
symai/collect/__init__.py +7 -1
symai/collect/dynamic.py +77 -69
symai/collect/pipeline.py +35 -27
symai/collect/stats.py +75 -63
symai/components.py +198 -169
symai/constraints.py +15 -12
symai/core.py +698 -359
symai/core_ext.py +32 -34
symai/endpoints/api.py +80 -73
symai/extended/.DS_Store +0 -0
symai/extended/__init__.py +46 -12
symai/extended/api_builder.py +11 -8
symai/extended/arxiv_pdf_parser.py +13 -12
symai/extended/bibtex_parser.py +2 -3
symai/extended/conversation.py +101 -90
symai/extended/document.py +17 -10
symai/extended/file_merger.py +18 -13
symai/extended/graph.py +18 -13
symai/extended/html_style_template.py +2 -4
symai/extended/interfaces/blip_2.py +1 -2
symai/extended/interfaces/clip.py +1 -2
symai/extended/interfaces/console.py +7 -1
symai/extended/interfaces/dall_e.py +1 -1
symai/extended/interfaces/flux.py +1 -1
symai/extended/interfaces/gpt_image.py +1 -1
symai/extended/interfaces/input.py +1 -1
symai/extended/interfaces/llava.py +0 -1
symai/extended/interfaces/naive_vectordb.py +7 -8
symai/extended/interfaces/naive_webscraping.py +1 -1
symai/extended/interfaces/ocr.py +1 -1
symai/extended/interfaces/pinecone.py +6 -5
symai/extended/interfaces/serpapi.py +1 -1
symai/extended/interfaces/terminal.py +2 -3
symai/extended/interfaces/tts.py +1 -1
symai/extended/interfaces/whisper.py +1 -1
symai/extended/interfaces/wolframalpha.py +1 -1
symai/extended/metrics/__init__.py +11 -1
symai/extended/metrics/similarity.py +11 -13
symai/extended/os_command.py +17 -16
symai/extended/packages/__init__.py +29 -3
symai/extended/packages/symdev.py +19 -16
symai/extended/packages/sympkg.py +12 -9
symai/extended/packages/symrun.py +21 -19
symai/extended/repo_cloner.py +11 -10
symai/extended/seo_query_optimizer.py +1 -2
symai/extended/solver.py +20 -23
symai/extended/summarizer.py +4 -3
symai/extended/taypan_interpreter.py +10 -12
symai/extended/vectordb.py +99 -82
symai/formatter/__init__.py +9 -1
symai/formatter/formatter.py +12 -16
symai/formatter/regex.py +62 -63
symai/functional.py +176 -122
symai/imports.py +136 -127
symai/interfaces.py +56 -27
symai/memory.py +14 -13
symai/misc/console.py +49 -39
symai/misc/loader.py +5 -3
symai/models/__init__.py +17 -1
symai/models/base.py +269 -181
symai/models/errors.py +0 -1
symai/ops/__init__.py +32 -22
symai/ops/measures.py +11 -15
symai/ops/primitives.py +348 -228
symai/post_processors.py +32 -28
symai/pre_processors.py +39 -41
symai/processor.py +6 -4
symai/prompts.py +59 -45
symai/server/huggingface_server.py +23 -20
symai/server/llama_cpp_server.py +7 -5
symai/shell.py +3 -4
symai/shellsv.py +499 -375
symai/strategy.py +517 -287
symai/symbol.py +111 -116
symai/utils.py +42 -36
{symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/METADATA +4 -2
symbolicai-1.0.0.dist-info/RECORD +163 -0
symbolicai-0.20.2.dist-info/RECORD +0 -162
{symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/WHEEL +0 -0
{symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/entry_points.txt +0 -0
{symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/licenses/LICENSE +0 -0
{symbolicai-0.20.2.dist-info → symbolicai-1.0.0.dist-info}/top_level.txt +0 -0

symai/backend/engines/files/engine_io.py CHANGED Viewed

@@ -1,5 +1,5 @@
+import contextlib
 import logging
-import os
 from dataclasses import dataclass
 from pathlib import Path
@@ -7,21 +7,59 @@ import pypdf
 import tika
 from tika import unpack
+from ....utils import UserMessage
 from ...base import Engine
 # Initialize Tika lazily to avoid spawning JVMs prematurely for all workers
-_TIKA_INITIALIZED = False
+_TIKA_STATE = {"initialized": False}
 def _ensure_tika_vm():
-    global _TIKA_INITIALIZED
-    if not _TIKA_INITIALIZED:
-        try:
+    if not _TIKA_STATE["initialized"]:
+        with contextlib.suppress(Exception):
             tika.initVM()
-        except Exception:
-            # If initVM fails, we still attempt unpack.from_file which may auto-init
-            pass
         logging.getLogger('tika').setLevel(logging.CRITICAL)
-        _TIKA_INITIALIZED = True
+        _TIKA_STATE["initialized"] = True
+def _int_or_none(value):
+    return int(value) if value != '' else None
+def _parse_slice_token(token):
+    if ':' not in token:
+        return int(token)
+    parts = token.split(':')
+    if len(parts) == 2:
+        start, end = parts
+        return slice(_int_or_none(start), _int_or_none(end), None)
+    if len(parts) == 3:
+        start, end, step = parts
+        return slice(_int_or_none(start), _int_or_none(end), _int_or_none(step))
+    return None
+def _parse_slice_spec(file_path):
+    if '[' not in file_path or ']' not in file_path:
+        return file_path, None
+    path_part, remainder = file_path.split('[', 1)
+    slice_section = remainder.split(']', 1)[0]
+    slices = []
+    for token in slice_section.split(','):
+        if token == '':
+            continue
+        parsed = _parse_slice_token(token)
+        if parsed is not None:
+            slices.append(parsed)
+    return path_part, slices or None
+def _apply_slices(lines, slices_):
+    if slices_ is None:
+        return lines
+    new_content = []
+    for slice_item in slices_:
+        new_content.extend(lines[slice_item])
+    return new_content
 @dataclass
@@ -42,83 +80,52 @@ class FileEngine(Engine):
     def _read_slice_file(self, file_path, argument):
         # check if file is empty
         with_metadata = argument.kwargs.get('with_metadata', False)
-        id            = Path(argument.prop.prepared_input).stem.replace(' ', '_')
+        file_id       = Path(argument.prop.prepared_input).stem.replace(' ', '_')
         if file_path is None or file_path.strip() == '':
             return None
         # check if file slice is used
-        slices_ = None
-        if '[' in file_path and ']' in file_path:
-            file_parts = file_path.split('[')
-            file_path = file_parts[0]
-            # remove string up to '[' and after ']'
-            slices_s = file_parts[1].split(']')[0].split(',')
-            slices_ = []
-            for s in slices_s:
-                if s == '':
-                    continue
-                elif ':' in s:
-                    s_split = s.split(':')
-                    if len(s_split) == 2:
-                        start_slice = int(s_split[0]) if s_split[0] != '' else None
-                        end_slice = int(s_split[1]) if s_split[1] != '' else None
-                        slices_.append(slice(start_slice, end_slice, None))
-                    elif len(s_split) == 3:
-                        start_slice = int(s_split[0]) if s_split[0] != '' else None
-                        end_slice = int(s_split[1]) if s_split[1] != '' else None
-                        step_slice = int(s_split[2]) if s_split[2] != '' else None
-                        slices_.append(slice(start_slice, end_slice, step_slice))
-                else:
-                    slices_.append(int(s))
+        file_path, slices_ = _parse_slice_spec(file_path)
+        path_obj = Path(file_path)
         # check if file exists
-        assert os.path.exists(file_path), f'File does not exist: {file_path}'
+        assert path_obj.exists(), f'File does not exist: {file_path}'
         # verify if file is empty
-        if os.path.getsize(file_path) <= 0:
+        if path_obj.stat().st_size <= 0:
             return ''
         # For common plain-text extensions, avoid Tika overhead
-        ext = Path(file_path).suffix.lower()
+        ext = path_obj.suffix.lower()
         if ext in {'.txt', '.md', '.py', '.json', '.yaml', '.yml', '.csv', '.tsv', '.log'}:
             try:
-                with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                with path_obj.open(encoding='utf-8', errors='ignore') as f:
                     content = f.read()
                 if content is None:
                     return None
                 # Apply slicing by lines, mirroring the Tika branch
                 lines = content.split('\n')
-                if slices_ is not None:
-                    new_content = []
-                    for s in slices_:
-                        new_content.extend(lines[s])
-                    lines = new_content
+                lines = _apply_slices(lines, slices_)
                 content = '\n'.join(lines)
                 content = content.encode('utf8', 'ignore').decode('utf8', 'ignore')
-                return content if not with_metadata else [TextContainer(id, None, content)]
+                return content if not with_metadata else [TextContainer(file_id, None, content)]
             except Exception:
                 # Fallback to Tika if plain read fails
                 pass
         _ensure_tika_vm()
-        file_ = unpack.from_file(str(file_path))
-        if 'content' in file_:
-            content = file_['content']
-        else:
-            content = str(file_)
+        file_ = unpack.from_file(str(path_obj))
+        content = file_['content'] if 'content' in file_ else str(file_)
         if content is None:
             return None
         content = content.split('\n')
-        if slices_ is not None:
-            new_content = []
-            for s in slices_:
-                new_content.extend(content[s])
-            content = new_content
+        content = _apply_slices(content, slices_)
         content = '\n'.join(content)
         content = content.encode('utf8', 'ignore').decode('utf8', 'ignore')
-        return content if not with_metadata else [TextContainer(id, None, content)]
+        return content if not with_metadata else [TextContainer(file_id, None, content)]
     def reset_eof_of_pdf_return_stream(self, pdf_stream_in: list):
@@ -127,7 +134,7 @@ class FileEngine(Engine):
         for i, x in enumerate(pdf_stream_in[::-1]):
             if b'%%EOF' in x:
                 actual_line = len(pdf_stream_in)-i
-                print(f'EOF found at line position {-i} = actual {actual_line}, with value {x}')
+                UserMessage(f'EOF found at line position {-i} = actual {actual_line}, with value {x}')
                 break
         # return the list up to that point
@@ -135,31 +142,31 @@ class FileEngine(Engine):
     def fix_pdf(self, file_path: str):
         # opens the file for reading
-        with open(file_path, 'rb') as p:
+        path_obj = Path(file_path)
+        with path_obj.open('rb') as p:
             txt = (p.readlines())
         # get the new list terminating correctly
         txtx = self.reset_eof_of_pdf_return_stream(txt)
         # write to new pdf
-        new_file_path = f'{file_path}_fixed.pdf'
-        with open(new_file_path, 'wb') as f:
+        new_file_path = Path(f'{file_path}_fixed.pdf')
+        with new_file_path.open('wb') as f:
             f.writelines(txtx)
-        fixed_pdf = pypdf.PdfReader(new_file_path)
-        return fixed_pdf
+        return pypdf.PdfReader(str(new_file_path))
     def read_text(self, pdf_reader, page_range, argument):
         txt = []
         n_pages  = len(pdf_reader.pages)
         with_metadata = argument.kwargs.get('with_metadata', False)
-        id       = Path(argument.prop.prepared_input).stem.replace(' ', '_')
+        file_id       = Path(argument.prop.prepared_input).stem.replace(' ', '_')
         for i in range(n_pages)[slice(0, n_pages) if page_range is None else page_range]:
             page = pdf_reader.pages[i]
             extracted = page.extract_text()
             extracted = extracted.encode('utf8', 'ignore').decode('utf8', 'ignore')
             if with_metadata:
-                txt.append(TextContainer(id, str(i), extracted))
+                txt.append(TextContainer(file_id, str(i), extracted))
             else:
                 txt.append(extracted)
@@ -173,17 +180,17 @@ class FileEngine(Engine):
             page_range = None
             if 'slice' in kwargs:
                 page_range = kwargs['slice']
-                if isinstance(page_range, tuple) or isinstance(page_range, list):
+                if isinstance(page_range, (tuple, list)):
                     page_range = slice(*page_range)
             rsp = ''
             try:
-                with open(str(path), 'rb') as f:
+                with Path(path).open('rb') as f:
                     # creating a pdf reader object
                     pdf_reader = pypdf.PdfReader(f)
                     rsp = self.read_text(pdf_reader, page_range, argument)
             except Exception as e:
-                print(f'Error reading PDF: {e} | {path}')
+                UserMessage(f'Error reading PDF: {e} | {path}')
                 if 'fix_pdf' not in kwargs or not kwargs['fix_pdf']:
                     raise e
                 fixed_pdf = self.fix_pdf(str(path))
@@ -193,11 +200,11 @@ class FileEngine(Engine):
             try:
                 rsp = self._read_slice_file(path, argument)
             except Exception as e:
-                print(f'Error reading empty file: {e} | {path}')
+                UserMessage(f'Error reading empty file: {e} | {path}')
                 raise e
         if rsp is None:
-            raise Exception(f'Error reading file - empty result: {path}')
+            UserMessage(f'Error reading file - empty result: {path}', raise_with=Exception)
         metadata = {}

symai/backend/engines/imagecaptioning/engine_blip2.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from typing import List
 import requests
 import torch
@@ -10,6 +9,7 @@ except ImportError:
 from PIL import Image
+from ....utils import UserMessage
 from ...base import Engine
 from ...settings import SYMAI_CONFIG
@@ -43,7 +43,7 @@ class Blip2Engine(Engine):
     def forward(self, argument):
         if load_model_and_preprocess is None:
-            raise ImportError('Blip2 is not installed. Please install it with `pip install symbolicai[blip2]`')
+            UserMessage('Blip2 is not installed. Please install it with `pip install symbolicai[blip2]`', raise_with=ImportError)
         if self.model is None:
             self.model, self.vis_processors, self.txt_processors  = load_model_and_preprocess(name       = self.name_id,
                                                                                               model_type = self.model_id,
@@ -52,7 +52,7 @@ class Blip2Engine(Engine):
         image, prompt = argument.prop.prepared_input
         kwargs        = argument.kwargs
-        except_remedy = kwargs['except_remedy'] if 'except_remedy' in kwargs else None
+        except_remedy = kwargs.get('except_remedy')
         if 'http' in image:
             image = Image.open(requests.get(image, stream=True).raw).convert('RGB')

symai/backend/engines/imagecaptioning/engine_llavacpp_client.py CHANGED Viewed

@@ -1,15 +1,16 @@
-import logging
-import requests
-import json
 import io
+import json
+import logging
+from pathlib import Path
-from typing import List
-from requests_toolbelt.multipart.encoder import MultipartEncoder
+import requests
 from PIL.Image import Image
+from requests_toolbelt.multipart.encoder import MultipartEncoder
+from ....symbol import Result
+from ....utils import UserMessage
 from ...base import Engine
 from ...settings import SYMAI_CONFIG
-from ....symbol import Result
 def image_to_byte_array(image: Image, format='PNG') -> bytes:
@@ -18,8 +19,7 @@ def image_to_byte_array(image: Image, format='PNG') -> bytes:
   # image.save expects a file-like as a argument
   image.save(imgByteArr, format=format)
   # Turn the BytesIO object back into a bytes object
-  imgByteArr = imgByteArr.getvalue()
-  return imgByteArr
+  return imgByteArr.getvalue()
 class LLaMAResult(Result):
@@ -74,7 +74,7 @@ class LLaMACppClientEngine(Engine):
             im_bytes = image_to_byte_array(image['content'], format=format_)
         else:
             # Convert image to bytes, open as binary
-            with open(image['content'], 'rb') as f:
+            with Path(image['content']).open('rb') as f:
                 im_bytes = f.read()
         # Create multipart/form-data payload
         payload      = MultipartEncoder(
@@ -87,7 +87,7 @@ class LLaMACppClientEngine(Engine):
         # Update the headers for multipart/form-data
         headers       = {'Content-Type': payload.content_type}
         api           = f'http://{self.host}:{self.port}/llava'
-        except_remedy = kwargs['except_remedy'] if 'except_remedy' in kwargs else None
+        except_remedy = kwargs.get('except_remedy')
         try:
             # use http localhost 8000 to send a request to the server
             rsp = requests.post(api, data=payload, headers=headers, timeout=self.timeout)
@@ -95,7 +95,8 @@ class LLaMACppClientEngine(Engine):
         except Exception as e:
             if except_remedy is None:
                 raise e
-            callback = lambda: requests.post(api, data=payload, headers=headers, timeout=self.timeout)
+            def callback():
+                return requests.post(api, data=payload, headers=headers, timeout=self.timeout)
             res = except_remedy(self, e, callback, argument)
         metadata = {}
@@ -105,17 +106,15 @@ class LLaMACppClientEngine(Engine):
         output = rsp if isinstance(prompts, list) else rsp[0]
         return output, metadata
-    def prepare(self, argument):
-        if argument.prop.raw_input:
-            if not argument.prop.processed_input:
-                raise ValueError('Need to provide a prompt instruction to the engine if raw_input is enabled.')
-            argument.prop.prepared_input = argument.prop.processed_input
-            return
-        user:   str = ""
-        system: str = ""
-        system = f'{system}\n' if system and len(system) > 0 else ''
+    def _handle_raw_input(self, argument) -> bool:
+        if not argument.prop.raw_input:
+            return False
+        if not argument.prop.processed_input:
+            UserMessage('Need to provide a prompt instruction to the engine if raw_input is enabled.', raise_with=ValueError)
+        argument.prop.prepared_input = argument.prop.processed_input
+        return True
+    def _append_context_sections(self, system: str, argument) -> str:
         ref = argument.prop.instance
         static_ctxt, dyn_ctxt = ref.global_context
         if len(static_ctxt) > 0:
@@ -126,36 +125,53 @@ class LLaMACppClientEngine(Engine):
         payload = argument.prop.payload
         if argument.prop.payload:
-            system += f"[ADDITIONAL CONTEXT]\n{str(payload)}\n\n"
+            system += f"[ADDITIONAL CONTEXT]\n{payload!s}\n\n"
-        examples: List[str] = argument.prop.examples
+        examples: list[str] = argument.prop.examples
         if examples and len(examples) > 0:
-            system += f"[EXAMPLES]\n{str(examples)}\n\n"
+            system += f"[EXAMPLES]\n{examples!s}\n\n"
+        return system
+    def _build_user_instruction(self, argument) -> str:
+        user = ""
         if argument.prop.prompt is not None and len(argument.prop.prompt) > 0:
             val = str(argument.prop.prompt)
-            # in this engine, instructions are considered as user prompts
             user += f"[INSTRUCTION]\n{val}"
+        return user
-        suffix: str = str(argument.prop.processed_input)
+    def _extract_system_instructions(self, argument, system: str, suffix: str) -> tuple[str, str]:
         if '[SYSTEM_INSTRUCTION::]: <<<' in suffix and argument.prop.parse_system_instructions:
             parts = suffix.split('\n>>>\n')
-            # first parts are the system instructions
-            c = 0
-            for i, p in enumerate(parts):
-                if 'SYSTEM_INSTRUCTION' in p:
-                    system += f"{p}\n"
-                    c += 1
+            consumed = 0
+            for part in parts:
+                if 'SYSTEM_INSTRUCTION' in part:
+                    system += f"{part}\n"
+                    consumed += 1
                 else:
                     break
-            # last part is the user input
-            suffix = '\n>>>\n'.join(parts[c:])
-        user += f"{suffix}"
+            suffix = '\n>>>\n'.join(parts[consumed:])
+        return system, suffix
+    def _append_template_suffix(self, user: str, argument) -> str:
         if argument.prop.template_suffix:
-            user += f"\n[[PLACEHOLDER]]\n{str(argument.prop.template_suffix)}\n\n"
-            user += f"Only generate content for the placeholder `[[PLACEHOLDER]]` following the instructions and context information. Do NOT write `[[PLACEHOLDER]]` or anything else in your output.\n\n"
+            user += f"\n[[PLACEHOLDER]]\n{argument.prop.template_suffix!s}\n\n"
+            user += "Only generate content for the placeholder `[[PLACEHOLDER]]` following the instructions and context information. Do NOT write `[[PLACEHOLDER]]` or anything else in your output.\n\n"
+        return user
+    def prepare(self, argument):
+        if self._handle_raw_input(argument):
+            return
+        system: str = ""
+        system = f'{system}\n' if system and len(system) > 0 else ''
+        system = self._append_context_sections(system, argument)
+        user = self._build_user_instruction(argument)
+        suffix: str = str(argument.prop.processed_input)
+        system, suffix = self._extract_system_instructions(argument, system, suffix)
+        user += f"{suffix}"
+        user = self._append_template_suffix(user, argument)
         user_prompt = { "role": "user", "content": user }
         argument.prop.prepared_input = [

symai/backend/engines/index/engine_pinecone.py CHANGED Viewed

@@ -1,17 +1,16 @@
+import contextlib
 import itertools
 import warnings
-import numpy as np
-warnings.filterwarnings('ignore', module='pinecone')
-try:
-    from pinecone import Pinecone, ServerlessSpec
-except:
-    pass
-from ...base import Engine
-from ...settings import SYMAI_CONFIG
 from .... import core_ext
 from ....symbol import Result
+from ....utils import UserMessage
+from ...base import Engine
+from ...settings import SYMAI_CONFIG
+warnings.filterwarnings('ignore', module='pinecone')
+with contextlib.suppress(BaseException):
+    from pinecone import Pinecone, ServerlessSpec
 def chunks(iterable, batch_size=100):
@@ -47,23 +46,23 @@ class PineconeResult(Result):
             return
         for i, match in enumerate(self.value):
-            match = match.strip()
-            if match.startswith('# ----[FILE_START]') and '# ----[FILE_END]' in match:
-                m = match.split('[FILE_CONTENT]:')[-1].strip()
+            match_value = match.strip()
+            if match_value.startswith('# ----[FILE_START]') and '# ----[FILE_END]' in match_value:
+                m = match_value.split('[FILE_CONTENT]:')[-1].strip()
                 splits = m.split('# ----[FILE_END]')
-                assert len(splits) >= 2, 'Invalid file format: {}'.format(splits)
+                assert len(splits) >= 2, f'Invalid file format: {splits}'
                 content = splits[0]
                 file_name = ','.join(splits[1:]) # TODO: check why there are multiple file names
                 yield file_name.strip(), content.strip()
             else:
-                yield i+1, match
+                yield i+1, match_value
     def __str__(self):
         str_view = ''
         for filename, content in self._unpack_matches():
             # indent each line of the content
-            content = '\n'.join(['  ' + line for line in content.split('\n')])
-            str_view += f'* {filename}\n{content}\n\n'
+            content_view = '\n'.join(['  ' + line for line in content.split('\n')])
+            str_view += f'* {filename}\n{content_view}\n\n'
         return f'''
 [RESULT]
 {'-=-' * 13}
@@ -138,7 +137,7 @@ class PineconeIndexEngine(Engine):
     def id(self) -> str:
         if SYMAI_CONFIG['INDEXING_ENGINE_API_KEY']:
             if Pinecone is None:
-                print('Pinecone is not installed. Please install it with `pip install symbolicai[pinecone]`.')
+                UserMessage('Pinecone is not installed. Please install it with `pip install symbolicai[pinecone]`.')
             return 'index'
         return super().id() # default to unregistered
@@ -150,13 +149,13 @@ class PineconeIndexEngine(Engine):
             self.environment = kwargs['INDEXING_ENGINE_ENVIRONMENT']
     def _configure_index(self, **kwargs):
-        index_name = kwargs['index_name'] if 'index_name' in kwargs else self.index_name
+        index_name = kwargs.get('index_name', self.index_name)
-        del_ = kwargs['index_del'] if 'index_del' in kwargs else False
+        del_ = kwargs.get('index_del', False)
         if self.index is not None and del_:
             self.pinecone.delete_index(index_name)
-        get_ = kwargs['index_get'] if 'index_get' in kwargs else False
+        get_ = kwargs.get('index_get', False)
         if self.index is not None and get_:
             self.index = self.pinecone.Index(name=index_name)
@@ -184,9 +183,9 @@ class PineconeIndexEngine(Engine):
             self._configure_index(**kwargs)
         if operation == 'search':
-            index_top_k    = kwargs['index_top_k'] if 'index_top_k' in kwargs else self.index_top_k
-            index_values   = kwargs['index_values'] if 'index_values' in kwargs else self.index_values
-            index_metadata = kwargs['index_metadata'] if 'index_metadata' in kwargs else self.index_metadata
+            index_top_k = kwargs.get('index_top_k', self.index_top_k)
+            index_values = kwargs.get('index_values', self.index_values)
+            index_metadata = kwargs.get('index_metadata', self.index_metadata)
             rsp = self._query(embedding, index_top_k, index_values, index_metadata)
         elif operation == 'add':
@@ -197,7 +196,7 @@ class PineconeIndexEngine(Engine):
             self._configure_index(**kwargs)
         else:
-            raise ValueError('Invalid operation')
+            UserMessage('Invalid operation', raise_with=ValueError)
         metadata = {}

symai/backend/engines/index/engine_vectordb.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import itertools
 from copy import deepcopy
+from typing import ClassVar
 from ....extended.vectordb import VectorDB
 from ....symbol import Result
-from ....utils import CustomUserWarning
+from ....utils import UserMessage
 from ...base import Engine
 from ...settings import SYMAI_CONFIG
@@ -40,23 +41,23 @@ class VectorDBResult(Result):
         if not self.value:
             return
         for i, match in enumerate(self.value):
-            match = match.strip()
-            if match.startswith('# ----[FILE_START]') and '# ----[FILE_END]' in match:
-                m = match.split('[FILE_CONTENT]:')[-1].strip()
+            match_value = match.strip()
+            if match_value.startswith('# ----[FILE_START]') and '# ----[FILE_END]' in match_value:
+                m = match_value.split('[FILE_CONTENT]:')[-1].strip()
                 splits = m.split('# ----[FILE_END]')
-                assert len(splits) >= 2, 'Invalid file format: {}'.format(splits)
+                assert len(splits) >= 2, f'Invalid file format: {splits}'
                 content = splits[0]
                 file_name = ','.join(splits[1:]) # TODO: check why there are multiple file names
                 yield file_name.strip(), content.strip()
             else:
-                yield i+1, match
+                yield i+1, match_value
     def __str__(self):
         str_view = ''
         for filename, content in self._unpack_matches():
             # indent each line of the content
-            content = '\n'.join(['  ' + line for line in content.split('\n')])
-            str_view += f'* {filename}\n{content}\n\n'
+            content_view = '\n'.join(['  ' + line for line in content.split('\n')])
+            str_view += f'* {filename}\n{content_view}\n\n'
         return f'''
 [RESULT]
 {'-=-' * 13}
@@ -85,8 +86,8 @@ class VectorDBIndexEngine(Engine):
     _default_index_dims = 768
     _default_index_top_k = 5
     _default_index_metric = 'cosine'
-    _index_dict = {}
-    _index_storage_file = None
+    _index_dict: ClassVar[dict[str, object]] = {}
+    _index_storage_file: ClassVar[str | None] = None
     def __init__(
             self,
             index_name=_default_index_name,
@@ -95,7 +96,7 @@ class VectorDBIndexEngine(Engine):
             index_metric=_default_index_metric,
             index_dict=_index_dict,
             index_storage_file=_index_storage_file,
-            **kwargs
+            **_kwargs
         ):
         super().__init__()
         self.config = deepcopy(SYMAI_CONFIG)
@@ -131,7 +132,7 @@ class VectorDBIndexEngine(Engine):
         if operation == 'search':
             if isinstance(query, list) and len(query) > 1:
-                CustomUserWarning('VectorDB indexing engine does not support multiple queries. Pass a single string query instead.', raise_with=ValueError)
+                UserMessage('VectorDB indexing engine does not support multiple queries. Pass a single string query instead.', raise_with=ValueError)
             query_vector = self.index[index_name].embedding_function([query])[0]
             results = self.index[index_name](vector=query_vector, top_k=top_k, return_similarities=similarities)
             rsp = [{'metadata': {'text': result}} for result in results]
@@ -154,9 +155,9 @@ class VectorDBIndexEngine(Engine):
             elif kwargs.get('purge', maybe_as_prompt == 'purge'):
                 self.purge(index_name)
             else:
-                CustomUserWarning('Invalid configuration; please use either "load", "save", or "purge".', raise_with=ValueError)
+                UserMessage('Invalid configuration; please use either "load", "save", or "purge".', raise_with=ValueError)
         else:
-            CustomUserWarning('Invalid operation; please use either "search", "add", or "config".', raise_with=ValueError)
+            UserMessage('Invalid operation; please use either "search", "add", or "config".', raise_with=ValueError)
         metadata = {}
         rsp = VectorDBResult(rsp, query[0], None)
@@ -176,6 +177,7 @@ class VectorDBIndexEngine(Engine):
     def prepare(self, argument):
         assert not argument.prop.processed_input, 'VectorDB indexing engine does not support processed_input.'
         argument.prop.prepared_input = argument.prop.prompt
+        argument.prop.limit = 1
     def load(self, index_name, storage_file, index_dims, top_k, metric):
         self.index[index_name] = VectorDB(

symbolicai 0.20.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

symbolicai 0.20.2py3-none-any.whl → 1.0.0py3-none-any.whl