symbolicai 1.3.0__tar.gz → 1.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {symbolicai-1.3.0 → symbolicai-1.5.0}/AGENTS.md +1 -1
- {symbolicai-1.3.0 → symbolicai-1.5.0}/PKG-INFO +4 -1
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/ENGINES/indexing_engine.md +50 -8
- symbolicai-1.5.0/docs/source/ENGINES/scrape_engine.md +143 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/ENGINES/search_engine.md +72 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/pyproject.toml +4 -1
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/__init__.py +1 -1
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/index/engine_qdrant.py +222 -10
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/scrape/engine_requests.py +39 -10
- symbolicai-1.5.0/symai/backend/engines/search/__init__.py +13 -0
- symbolicai-1.5.0/symai/backend/engines/search/engine_firecrawl.py +333 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/search/engine_parallel.py +5 -5
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/components.py +9 -3
- symbolicai-1.5.0/symai/extended/interfaces/firecrawl.py +30 -0
- symbolicai-1.5.0/symai/extended/interfaces/local_search.py +57 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/parallel.py +5 -5
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/functional.py +3 -4
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symbolicai.egg-info/PKG-INFO +4 -1
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symbolicai.egg-info/SOURCES.txt +6 -1
- symbolicai-1.5.0/symbolicai.egg-info/dependency_links.txt +1 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symbolicai.egg-info/requires.txt +4 -0
- symbolicai-1.5.0/tests/data/symmetry_breaking.pdf +0 -0
- symbolicai-1.5.0/uv.lock +9197 -0
- symbolicai-1.3.0/docs/source/ENGINES/scrape_engine.md +0 -43
- symbolicai-1.3.0/symai/misc/__init__.py +0 -0
- symbolicai-1.3.0/uv.lock +0 -7673
- {symbolicai-1.3.0 → symbolicai-1.5.0}/.gitbook.yaml +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/.github/FUNDING.yml +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/.gitignore +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/.symai/symsh.config.json +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/CITATION.cff +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/Dockerfile +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/LICENSE +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/MANIFEST.in +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/README.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/app.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/banner.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/cat.jpg +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/cat.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/contract_flow.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/img1.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/img10.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/img2.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/img3.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/img4.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/img5.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/img6.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/img7.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/img8.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/img9.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/preview.gif +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/screen1.jpeg +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/symai_logo.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/symsh.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/vid1.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/vid2.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/vid3.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/vid4.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/vid5.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/vid6.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/results/news.html +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/results/news.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/results/news_prev.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/bin/install.ps1 +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/bin/install.sh +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/build.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docker-compose.yml +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/ENGINES/clip_engine.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/ENGINES/custom_engine.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/ENGINES/drawing_engine.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/ENGINES/file_engine.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/ENGINES/local_engine.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/ENGINES/neurosymbolic_engine.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/ENGINES/ocr_engine.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/ENGINES/speech_to_text_engine.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/ENGINES/symbolic_engine.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/FEATURES/contracts.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/FEATURES/error_handling.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/FEATURES/expressions.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/FEATURES/import.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/FEATURES/operations.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/FEATURES/primitives.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/INSTALLATION.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/INTRODUCTION.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/QUICKSTART.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/SUMMARY.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/TOOLS/chatbot.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/TOOLS/packages.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/TOOLS/shell.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/TUTORIALS/chatbot.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/TUTORIALS/context.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/TUTORIALS/data_query.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/TUTORIALS/video_tutorials.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/environment.yml +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/examples/contracts.ipynb +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/examples/primitives.ipynb +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/icon_converter.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/installer.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/Basics.ipynb +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/ChatBot.ipynb +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/Conversation.ipynb +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/Indexer.ipynb +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/News.ipynb +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/Queries.ipynb +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/TTS_Persona.ipynb +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/Lean engine.png +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/a_star.txt +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/abstract.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/audio.mp3 +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/dbpedia_samples.jsonl +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/dbpedia_samples_prepared_train.jsonl +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/dbpedia_samples_prepared_valid.jsonl +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/demo.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/demo_strategy.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/docs.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/einsteins_puzzle.txt +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/file.json +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/lean.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/news.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/paper.pdf +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/paper.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/sql.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/public/eai.svg +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/pytest.ini +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/ruff.toml +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/setup.cfg +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/setup.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/TERMS_OF_SERVICE.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/__init__.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/base.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/__init__.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/drawing/engine_bfl.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/drawing/engine_gpt_image.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/embedding/engine_llama_cpp.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/embedding/engine_openai.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/execute/engine_python.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/files/engine_io.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/imagecaptioning/engine_blip2.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/index/engine_pinecone.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/index/engine_vectordb.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/lean/engine_lean4.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/__init__.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_cerebras.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_groq.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_huggingface.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_llama_cpp.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_openai_responses.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/ocr/engine_apilayer.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/output/engine_stdout.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/search/engine_openai.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/search/engine_perplexity.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/search/engine_serpapi.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/speech_to_text/engine_local_whisper.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/symbolic/engine_wolframalpha.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/text_to_speech/engine_openai.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/text_vision/engine_clip.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/userinput/engine_console.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/mixin/__init__.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/mixin/anthropic.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/mixin/cerebras.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/mixin/deepseek.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/mixin/google.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/mixin/groq.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/mixin/openai.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/settings.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/chat.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/collect/__init__.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/collect/dynamic.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/collect/pipeline.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/collect/stats.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/constraints.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/context.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/core.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/core_ext.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/endpoints/__init__py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/endpoints/api.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/exceptions.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/.DS_Store +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/__init__.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/api_builder.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/arxiv_pdf_parser.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/bibtex_parser.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/conversation.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/document.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/file_merger.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/graph.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/html_style_template.py +0 -0
- {symbolicai-1.3.0/symai/server → symbolicai-1.5.0/symai/extended/interfaces}/__init__.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/blip_2.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/clip.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/console.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/dall_e.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/file.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/flux.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/gpt_image.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/input.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/llava.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/naive_scrape.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/naive_vectordb.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/ocr.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/openai_search.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/perplexity.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/pinecone.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/python.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/serpapi.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/terminal.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/tts.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/whisper.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/wolframalpha.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/metrics/__init__.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/metrics/similarity.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/os_command.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/packages/__init__.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/packages/symdev.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/packages/sympkg.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/packages/symrun.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/repo_cloner.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/seo_query_optimizer.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/solver.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/summarizer.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/taypan_interpreter.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/vectordb.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/formatter/__init__.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/formatter/emoji.pytxt +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/formatter/formatter.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/formatter/regex.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/imports.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/interfaces.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/memory.py +0 -0
- {symbolicai-1.3.0/symai/extended/interfaces → symbolicai-1.5.0/symai/menu}/__init__.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/menu/screen.py +0 -0
- {symbolicai-1.3.0/symai/menu → symbolicai-1.5.0/symai/misc}/__init__.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/misc/console.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/misc/loader.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/models/__init__.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/models/base.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/models/errors.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/ops/__init__.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/ops/measures.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/ops/primitives.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/post_processors.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/pre_processors.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/processor.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/prompts.py +0 -0
- /symbolicai-1.3.0/symbolicai.egg-info/dependency_links.txt → /symbolicai-1.5.0/symai/server/__init__.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/server/huggingface_server.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/server/llama_cpp_server.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/server/qdrant_server.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/shell.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/shellsv.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/strategy.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/symbol.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/symsh.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/utils.py +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symbolicai.egg-info/entry_points.txt +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/symbolicai.egg-info/top_level.txt +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/tests/README.md +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/tests/data/audio.mp3 +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/tests/data/pg1727.txt +0 -0
- {symbolicai-1.3.0 → symbolicai-1.5.0}/trusted_repos.yml +0 -0
|
@@ -66,7 +66,7 @@ CLI entrypoints (after install): `symchat`, `symsh`, `symconfig`, `symserver`.
|
|
|
66
66
|
- Treat type hints as contracts; do not add runtime type checks except at trust boundaries (CLI/env, JSON/network, disk).
|
|
67
67
|
- Prefer minimal diffs; edit existing code over adding new files unless necessary.
|
|
68
68
|
- Do not add/modify `tests/` or run tests unless explicitly requested; if requested, run the narrowest relevant `pytest` command.
|
|
69
|
-
- When you change Python files
|
|
69
|
+
- When you change Python files outside `tests/`: run `ruff check <changed_files> --output-format concise --config ruff.toml` and fix issues.
|
|
70
70
|
- Keep search local-first (`rg`); follow imports instead of repo-wide “random scanning”.
|
|
71
71
|
- If adding a regex, include a short comment explaining what it matches.
|
|
72
72
|
- Update `TODO.md` when tasks are completed, added, or re-scoped.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: symbolicai
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.5.0
|
|
4
4
|
Summary: A Neurosymbolic Perspective on Large Language Models
|
|
5
5
|
Author-email: Marius-Constantin Dinu <marius@extensity.ai>, Leoveanu-Condrei Claudiu <leo@extensity.ai>
|
|
6
6
|
License: BSD 3-Clause License
|
|
@@ -113,6 +113,7 @@ Requires-Dist: openai-whisper>=20240930; extra == "whisper"
|
|
|
113
113
|
Requires-Dist: numba>=0.62.1; extra == "whisper"
|
|
114
114
|
Requires-Dist: llvmlite>=0.45.1; extra == "whisper"
|
|
115
115
|
Provides-Extra: search
|
|
116
|
+
Requires-Dist: firecrawl-py>=4.12.0; extra == "search"
|
|
116
117
|
Requires-Dist: parallel-web>=0.3.3; extra == "search"
|
|
117
118
|
Provides-Extra: serpapi
|
|
118
119
|
Requires-Dist: google_search_results>=2.4.2; extra == "serpapi"
|
|
@@ -136,6 +137,8 @@ Requires-Dist: symbolicai[serpapi]; extra == "all"
|
|
|
136
137
|
Requires-Dist: symbolicai[services]; extra == "all"
|
|
137
138
|
Requires-Dist: symbolicai[solver]; extra == "all"
|
|
138
139
|
Requires-Dist: symbolicai[qdrant]; extra == "all"
|
|
140
|
+
Provides-Extra: dev
|
|
141
|
+
Requires-Dist: pytest-asyncio>=1.3.0; extra == "dev"
|
|
139
142
|
Dynamic: license-file
|
|
140
143
|
|
|
141
144
|
# **SymbolicAI: A neuro-symbolic perspective on LLMs**
|
|
@@ -31,19 +31,22 @@ The Qdrant engine provides a production-ready vector database for scalable RAG a
|
|
|
31
31
|
|
|
32
32
|
### Setup
|
|
33
33
|
|
|
34
|
-
#### Option 1: Local Qdrant Server
|
|
34
|
+
#### Option 1: Local Qdrant Server (via symserver)
|
|
35
35
|
|
|
36
|
-
Start
|
|
36
|
+
Start Qdrant using the `symserver` CLI (Docker by default).
|
|
37
37
|
|
|
38
38
|
```bash
|
|
39
|
-
#
|
|
40
|
-
|
|
39
|
+
# Pull the image once (recommended)
|
|
40
|
+
docker pull qdrant/qdrant:latest
|
|
41
41
|
|
|
42
|
-
#
|
|
43
|
-
|
|
42
|
+
# Docker (default): set INDEXING_ENGINE so symserver selects Qdrant
|
|
43
|
+
INDEXING_ENGINE=qdrant symserver --host 0.0.0.0 --port 6333 --storage-path ./qdrant_storage
|
|
44
44
|
|
|
45
|
-
#
|
|
46
|
-
|
|
45
|
+
# Use native binary
|
|
46
|
+
INDEXING_ENGINE=qdrant symserver --env binary --binary-path /path/to/qdrant --port 6333 --storage-path ./qdrant_storage
|
|
47
|
+
|
|
48
|
+
# Detach Docker if desired
|
|
49
|
+
INDEXING_ENGINE=qdrant symserver --docker-detach
|
|
47
50
|
```
|
|
48
51
|
|
|
49
52
|
#### Option 2: Cloud Qdrant
|
|
@@ -103,6 +106,43 @@ async def basic_usage():
|
|
|
103
106
|
asyncio.run(basic_usage())
|
|
104
107
|
```
|
|
105
108
|
|
|
109
|
+
### Local Search with citations
|
|
110
|
+
|
|
111
|
+
If you need citation-formatted results compatible with `parallel.search`, use the `local_search` interface. It embeds the query locally, queries Qdrant, and returns a `SearchResult` (with `value` and `citations`) instead of raw `ScoredPoint` objects:
|
|
112
|
+
|
|
113
|
+
Local search accepts the same args as passed to Qdrant directly: `collection_name`/`index_name`, `limit`/`top_k`/`index_top_k`, `score_threshold`, `query_filter` (dict or Qdrant `Filter`), and any extra Qdrant search kwargs. Citation fields are derived from Qdrant payloads: the excerpt uses `payload["text"]` (or `content`), the URL is resolved from `payload["source"]`/`url`/`file_path`/`path` and is always returned as an absolute `file://` URI (relative inputs resolve against the current working directory), and the title is the stem of that path (PDF pages append `#p{page}` when provided). Each matching chunk yields its own citation; multiple citations can point to the same file.
|
|
114
|
+
|
|
115
|
+
If you want a stable source header for each chunk, store a `source_id` or `chunk_id` in the payload (otherwise the Qdrant point id is used).
|
|
116
|
+
|
|
117
|
+
Example:
|
|
118
|
+
|
|
119
|
+
```python
|
|
120
|
+
from symai.interfaces import Interface
|
|
121
|
+
from qdrant_client.http import models
|
|
122
|
+
|
|
123
|
+
search = Interface("local_search", index_name="my_collection")
|
|
124
|
+
|
|
125
|
+
qdrant_filter = models.Filter(
|
|
126
|
+
must=[
|
|
127
|
+
models.FieldCondition(key="category", match=models.MatchValue(value="AI"))
|
|
128
|
+
]
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
result = search.search(
|
|
132
|
+
"neural networks and transformers",
|
|
133
|
+
collection_name="my_collection", # alias: index_name
|
|
134
|
+
limit=5, # aliases: top_k, index_top_k
|
|
135
|
+
score_threshold=0.35,
|
|
136
|
+
query_filter=qdrant_filter, # or a simple dict like {"category": "AI"}
|
|
137
|
+
with_payload=True, # passed through to Qdrant query_points
|
|
138
|
+
with_vectors=False, # optional; defaults follow engine config
|
|
139
|
+
# any other Qdrant query_points kwargs can be added here
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
print(result.value) # formatted text with [1], [2] markers
|
|
143
|
+
print(result.get_citations()) # list of Citation objects
|
|
144
|
+
```
|
|
145
|
+
|
|
106
146
|
### Collection Management
|
|
107
147
|
|
|
108
148
|
Create and manage collections programmatically:
|
|
@@ -156,6 +196,8 @@ async def add_documents():
|
|
|
156
196
|
document_path="/path/to/document.pdf",
|
|
157
197
|
metadata={"source": "document.pdf"}
|
|
158
198
|
)
|
|
199
|
+
# Note: document_path indexing stores the absolute file path in payload["source"]
|
|
200
|
+
# so local_search citations resolve to file:// URIs.
|
|
159
201
|
|
|
160
202
|
# Chunk and index from a URL
|
|
161
203
|
num_chunks = await engine.chunk_and_upsert(
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# Scrape Engine
|
|
2
|
+
|
|
3
|
+
## Naive Scrape
|
|
4
|
+
|
|
5
|
+
To access data from the web, we can use the `naive_scrape` interface. The engine underneath is very lightweight and can be used to scrape data from websites. It is based on the `requests` library, as well as `trafilatura` for output formatting, and `bs4` for HTML parsing. `trafilatura` currently supports the following output formats: `json`, `csv`, `html`, `markdown`, `text`, `xml`
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
from symai.interfaces import Interface
|
|
9
|
+
|
|
10
|
+
scraper = Interface("naive_scrape")
|
|
11
|
+
url = "https://docs.astral.sh/uv/guides/scripts/#next-steps"
|
|
12
|
+
res = scraper(url)
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Parallel (Parallel.ai)
|
|
16
|
+
|
|
17
|
+
The Parallel.ai integration routes scrape calls through the official `parallel-web` SDK and can handle PDFs, JavaScript-heavy feeds, and standard HTML pages in the same workflow. Instantiate the Parallel interface and call `.scrape(...)` with the target URL. The engine detects scrape requests automatically whenever a URL is supplied.
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
from symai.extended import Interface
|
|
21
|
+
|
|
22
|
+
scraper = Interface("parallel")
|
|
23
|
+
article = scraper.scrape(
|
|
24
|
+
"https://trafilatura.readthedocs.io/en/latest/crawls.html",
|
|
25
|
+
full_content=True, # optional: request full document text
|
|
26
|
+
excerpts=True, # optional: default True, retain excerpt snippets
|
|
27
|
+
objective="Summarize crawl guidance for internal notes."
|
|
28
|
+
)
|
|
29
|
+
print(str(article))
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Configuration requires a Parallel API key and the Parallel model token. Add the following to your settings:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
{
|
|
36
|
+
…
|
|
37
|
+
"SEARCH_ENGINE_API_KEY": "…",
|
|
38
|
+
"SEARCH_ENGINE_MODEL": "parallel"
|
|
39
|
+
…
|
|
40
|
+
}
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
When invoked with a URL, the engine hits Parallel's Extract API and returns an `ExtractResult`. The result string joins excerpts or the full content if requested. Because processing is offloaded to Parallel's hosted infrastructure, the engine remains reliable on dynamic pages that the naive scraper cannot render. Install the dependency with `pip install parallel-web` before enabling this engine.
|
|
44
|
+
|
|
45
|
+
## Firecrawl
|
|
46
|
+
|
|
47
|
+
Firecrawl.dev specializes in reliable web scraping with automatic handling of JavaScript-rendered content, proxies, and anti-bot mechanisms. It converts web pages into clean formats suitable for LLM consumption and supports advanced features like actions, caching, and location-based scraping.
|
|
48
|
+
|
|
49
|
+
### Examples
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from symai.extended import Interface
|
|
53
|
+
|
|
54
|
+
scraper = Interface("firecrawl")
|
|
55
|
+
|
|
56
|
+
# Example 1: Basic webpage scraping
|
|
57
|
+
content = scraper.scrape(
|
|
58
|
+
"https://docs.firecrawl.dev/introduction",
|
|
59
|
+
formats=["markdown"]
|
|
60
|
+
)
|
|
61
|
+
print(content)
|
|
62
|
+
|
|
63
|
+
# Example 2: PDF scraping with content extraction and trimming
|
|
64
|
+
pdf_full = scraper.scrape(
|
|
65
|
+
"https://pmc.ncbi.nlm.nih.gov/articles/PMC7231600",
|
|
66
|
+
only_main_content=True,
|
|
67
|
+
formats=["markdown"],
|
|
68
|
+
proxy="auto"
|
|
69
|
+
)
|
|
70
|
+
# Trim locally if needed
|
|
71
|
+
pdf_trimmed = str(pdf_full)[:100]
|
|
72
|
+
|
|
73
|
+
# Note: JS-heavy sites like Twitter/LinkedIn are currently not fully supported
|
|
74
|
+
# They typically return 403 Forbidden errors (may vary by subscription tier)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### Configuration
|
|
78
|
+
|
|
79
|
+
Enable the engine by configuring Firecrawl credentials:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
{
|
|
83
|
+
"SEARCH_ENGINE_API_KEY": "fc-your-api-key",
|
|
84
|
+
"SEARCH_ENGINE_MODEL": "firecrawl"
|
|
85
|
+
}
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### JSON Schema Extraction
|
|
89
|
+
|
|
90
|
+
Firecrawl supports structured data extraction using JSON schemas. This is useful for extracting specific fields from web pages using LLM-powered extraction:
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
from pydantic import Field
|
|
94
|
+
from symai.extended import Interface
|
|
95
|
+
from symai.models import LLMDataModel
|
|
96
|
+
|
|
97
|
+
class MetadataModel(LLMDataModel):
|
|
98
|
+
"""Bibliographic metadata extracted from a source document."""
|
|
99
|
+
title: str = Field(description="Title of the source.")
|
|
100
|
+
year: str = Field(description="Publication year (4 digits) or Unknown.")
|
|
101
|
+
authors: list[str] = Field(default_factory=list, description="List of authors.")
|
|
102
|
+
doi: str | None = Field(default=None, description="DOI if available.")
|
|
103
|
+
|
|
104
|
+
# Build JSON format config from Pydantic schema
|
|
105
|
+
schema = MetadataModel.model_json_schema()
|
|
106
|
+
json_format = {
|
|
107
|
+
"type": "json",
|
|
108
|
+
"prompt": "Extract bibliographic metadata from this academic paper.",
|
|
109
|
+
"schema": schema,
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
scraper = Interface("firecrawl")
|
|
113
|
+
result = scraper.scrape(
|
|
114
|
+
"https://journals.physiology.org/doi/full/10.1152/ajpregu.00051.2002",
|
|
115
|
+
formats=[json_format],
|
|
116
|
+
proxy="auto"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Access extracted data as dict
|
|
120
|
+
extracted = result.raw["json"]
|
|
121
|
+
metadata = MetadataModel(**extracted)
|
|
122
|
+
print(metadata.model_dump())
|
|
123
|
+
|
|
124
|
+
# Or as JSON string
|
|
125
|
+
print(str(result))
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Supported Parameters
|
|
129
|
+
|
|
130
|
+
The engine supports many parameters (passed as kwargs). Common ones include:
|
|
131
|
+
- **formats**: Output formats (["markdown"], ["html"], ["rawHtml"])
|
|
132
|
+
- **only_main_content**: Extract main content only (boolean)
|
|
133
|
+
- **proxy**: Proxy mode ("basic", "stealth", "auto")
|
|
134
|
+
- **location**: Geographic location object with optional country and languages
|
|
135
|
+
- Example: `{"country": "US"}` or `{"country": "RO", "languages": ["ro"]}`
|
|
136
|
+
- **maxAge**: Cache duration in seconds (integer)
|
|
137
|
+
- **storeInCache**: Enable caching (boolean)
|
|
138
|
+
- **actions**: Page interactions before scraping (list of action objects)
|
|
139
|
+
- Example: `[{"type": "wait", "milliseconds": 2000}]`
|
|
140
|
+
- Example: `[{"type": "click", "selector": ".button"}]`
|
|
141
|
+
- Example: `[{"type": "scroll", "direction": "down", "amount": 500}]`
|
|
142
|
+
|
|
143
|
+
Check the Firecrawl v2 API documentation for the complete list of available parameters and action types.
|
|
@@ -152,3 +152,75 @@ Here's how to configure the OpenAI search engine:
|
|
|
152
152
|
```
|
|
153
153
|
|
|
154
154
|
This engine calls the OpenAI Responses API under the hood. When you target a reasoning-capable model, pass a `reasoning` dictionary matching the Responses payload schema (for example `{"effort": "low", "summary": "auto"}`). If omitted, the engine falls back to the default effort/summary settings shown above.
|
|
155
|
+
|
|
156
|
+
## Firecrawl
|
|
157
|
+
Firecrawl.dev provides web scraping and search capabilities with built-in handling of dynamic JavaScript content and anti-bot mechanisms. The engine converts web pages into clean markdown and can perform web searches across multiple sources with advanced filtering and content extraction.
|
|
158
|
+
|
|
159
|
+
### Comprehensive Search Example
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
from symai.extended import Interface
|
|
163
|
+
|
|
164
|
+
engine = Interface("firecrawl")
|
|
165
|
+
|
|
166
|
+
# Example 1: Location-aware search with language, scraping, and citations
|
|
167
|
+
result = engine.search(
|
|
168
|
+
"who is nicusor dan",
|
|
169
|
+
limit=5,
|
|
170
|
+
location="Romania",
|
|
171
|
+
lang="ro",
|
|
172
|
+
sources=["web"],
|
|
173
|
+
formats=["markdown"],
|
|
174
|
+
only_main_content=True,
|
|
175
|
+
proxy="stealth"
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Access structured citations (similar to parallel.ai)
|
|
179
|
+
citations = result.get_citations()
|
|
180
|
+
for citation in citations:
|
|
181
|
+
print(f"[{citation.id}] {citation.title}: {citation.url}")
|
|
182
|
+
|
|
183
|
+
# Example 2: Domain-filtered search with character limits
|
|
184
|
+
domains = ["arxiv.org", "nature.com"]
|
|
185
|
+
filters = " OR ".join(f"site:{domain}" for domain in domains)
|
|
186
|
+
query = f"({filters}) what is thermodynamic computing"
|
|
187
|
+
|
|
188
|
+
result = engine.search(
|
|
189
|
+
query,
|
|
190
|
+
limit=10,
|
|
191
|
+
max_chars_per_result=500,
|
|
192
|
+
categories=["research"],
|
|
193
|
+
formats=["markdown"],
|
|
194
|
+
proxy="basic"
|
|
195
|
+
)
|
|
196
|
+
print(result)
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
### Configuration
|
|
200
|
+
|
|
201
|
+
Enable the engine by configuring Firecrawl credentials:
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
{
|
|
205
|
+
"SEARCH_ENGINE_API_KEY": "fc-your-api-key",
|
|
206
|
+
"SEARCH_ENGINE_MODEL": "firecrawl"
|
|
207
|
+
}
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### Supported Parameters
|
|
211
|
+
|
|
212
|
+
The engine supports many parameters (passed as kwargs). Common ones include:
|
|
213
|
+
- **limit**: Max number of results
|
|
214
|
+
- **location**: Country code string for search (e.g., "Romania", "Germany")
|
|
215
|
+
- **lang**: Language code string for search (e.g., "ro", "es") - hint, not enforcement
|
|
216
|
+
- **sources**: List of sources (["web"], ["news"], ["images"])
|
|
217
|
+
- **categories**: Content types (["research"], ["github"], ["pdf"])
|
|
218
|
+
- **tbs**: Time-based filter (e.g., "qdr:d" for past day)
|
|
219
|
+
- **formats**: Output formats for scraped content (["markdown"], ["html"])
|
|
220
|
+
- **only_main_content**: Extract main content only when scraping (boolean)
|
|
221
|
+
- **max_chars_per_result**: Truncate results locally (integer)
|
|
222
|
+
- **proxy**: Proxy mode for scraping ("basic", "stealth", "auto")
|
|
223
|
+
- **scrape_location**: Location object for scraping with optional country and languages
|
|
224
|
+
- Example: `{"country": "US"}` or `{"country": "RO", "languages": ["ro"]}`
|
|
225
|
+
|
|
226
|
+
Check the Firecrawl v2 API documentation for the complete list of available parameters.
|
|
@@ -78,7 +78,7 @@ scrape = ["beautifulsoup4>=4.12.3", "trafilatura>=2.0.0", "pdfminer.six",
|
|
|
78
78
|
llama_cpp = ["llama-cpp-python[server]>=0.3.7"] # handle separately since this dependency may not compile and require special maintenance
|
|
79
79
|
wolframalpha = ["wolframalpha>=5.0.0"]
|
|
80
80
|
whisper = ["openai-whisper>=20240930", "numba>=0.62.1", "llvmlite>=0.45.1"]
|
|
81
|
-
search = ["parallel-web>=0.3.3"]
|
|
81
|
+
search = ["firecrawl-py>=4.12.0", "parallel-web>=0.3.3"]
|
|
82
82
|
serpapi = ["google_search_results>=2.4.2"]
|
|
83
83
|
services = ["fastapi>=0.110.0", "redis>=5.0.2", "uvicorn>=0.27.1"]
|
|
84
84
|
solver = ["z3-solver>=4.12.6.0"]
|
|
@@ -94,6 +94,9 @@ all = [
|
|
|
94
94
|
"symbolicai[solver]",
|
|
95
95
|
"symbolicai[qdrant]"
|
|
96
96
|
]
|
|
97
|
+
dev = [
|
|
98
|
+
"pytest-asyncio>=1.3.0",
|
|
99
|
+
]
|
|
97
100
|
|
|
98
101
|
[tool.setuptools.dynamic]
|
|
99
102
|
version = {attr = "symai.SYMAI_VERSION"}
|
|
@@ -4,8 +4,10 @@ import tempfile
|
|
|
4
4
|
import urllib.request
|
|
5
5
|
import uuid
|
|
6
6
|
import warnings
|
|
7
|
+
from dataclasses import dataclass
|
|
7
8
|
from pathlib import Path
|
|
8
9
|
from typing import Any
|
|
10
|
+
from urllib.parse import urlparse
|
|
9
11
|
|
|
10
12
|
import numpy as np
|
|
11
13
|
|
|
@@ -148,6 +150,108 @@ Matches:
|
|
|
148
150
|
return f"<ul>{doc_str}</ul>"
|
|
149
151
|
|
|
150
152
|
|
|
153
|
+
@dataclass
|
|
154
|
+
class Citation:
|
|
155
|
+
id: int
|
|
156
|
+
title: str
|
|
157
|
+
url: str
|
|
158
|
+
start: int
|
|
159
|
+
end: int
|
|
160
|
+
|
|
161
|
+
def __hash__(self):
|
|
162
|
+
return hash((self.url,))
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class SearchResult(Result):
|
|
166
|
+
def __init__(self, value: dict[str, Any] | Any, **kwargs) -> None:
|
|
167
|
+
super().__init__(value, **kwargs)
|
|
168
|
+
if isinstance(value, dict) and value.get("error"):
|
|
169
|
+
UserMessage(value["error"], raise_with=ValueError)
|
|
170
|
+
results = self._coerce_results(value)
|
|
171
|
+
text, citations = self._build_text_and_citations(results)
|
|
172
|
+
self._value = text
|
|
173
|
+
self._citations = citations
|
|
174
|
+
|
|
175
|
+
def _coerce_results(self, raw: Any) -> list[dict[str, Any]]:
|
|
176
|
+
if raw is None:
|
|
177
|
+
return []
|
|
178
|
+
results = raw.get("results", []) if isinstance(raw, dict) else getattr(raw, "results", None)
|
|
179
|
+
if not results:
|
|
180
|
+
return []
|
|
181
|
+
return [item for item in results if isinstance(item, dict)]
|
|
182
|
+
|
|
183
|
+
def _source_identifier(self, item: dict[str, Any], url: str) -> str:
|
|
184
|
+
for key in ("source_id", "sourceId", "sourceID", "id"):
|
|
185
|
+
raw = item.get(key)
|
|
186
|
+
if raw is None:
|
|
187
|
+
continue
|
|
188
|
+
text = str(raw).strip()
|
|
189
|
+
if text:
|
|
190
|
+
return text
|
|
191
|
+
path = Path(urlparse(url).path)
|
|
192
|
+
return path.name or path.as_posix() or url
|
|
193
|
+
|
|
194
|
+
def _build_text_and_citations(self, results: list[dict[str, Any]]):
|
|
195
|
+
pieces = []
|
|
196
|
+
citations = []
|
|
197
|
+
cursor = 0
|
|
198
|
+
cid = 1
|
|
199
|
+
separator = "\n\n---\n\n"
|
|
200
|
+
|
|
201
|
+
for item in results:
|
|
202
|
+
url = str(item.get("url") or "")
|
|
203
|
+
if not url:
|
|
204
|
+
continue
|
|
205
|
+
|
|
206
|
+
title = str(item.get("title") or "")
|
|
207
|
+
if not title:
|
|
208
|
+
path = Path(urlparse(url).path)
|
|
209
|
+
title = path.name or url
|
|
210
|
+
|
|
211
|
+
excerpts = item.get("excerpts") or []
|
|
212
|
+
excerpt_parts = [ex.strip() for ex in excerpts if isinstance(ex, str) and ex.strip()]
|
|
213
|
+
if not excerpt_parts:
|
|
214
|
+
continue
|
|
215
|
+
|
|
216
|
+
combined_excerpt = "\n\n".join(excerpt_parts)
|
|
217
|
+
source_id = self._source_identifier(item, url)
|
|
218
|
+
block_body = combined_excerpt if not source_id else f"{source_id}\n\n{combined_excerpt}"
|
|
219
|
+
|
|
220
|
+
if pieces:
|
|
221
|
+
pieces.append(separator)
|
|
222
|
+
cursor += len(separator)
|
|
223
|
+
|
|
224
|
+
opening_tag = "<source>\n"
|
|
225
|
+
pieces.append(opening_tag)
|
|
226
|
+
cursor += len(opening_tag)
|
|
227
|
+
|
|
228
|
+
pieces.append(block_body)
|
|
229
|
+
cursor += len(block_body)
|
|
230
|
+
|
|
231
|
+
closing_tag = "\n</source>"
|
|
232
|
+
pieces.append(closing_tag)
|
|
233
|
+
cursor += len(closing_tag)
|
|
234
|
+
|
|
235
|
+
marker = f"[{cid}]"
|
|
236
|
+
start = cursor
|
|
237
|
+
pieces.append(marker)
|
|
238
|
+
cursor += len(marker)
|
|
239
|
+
|
|
240
|
+
citations.append(Citation(id=cid, title=title or url, url=url, start=start, end=cursor))
|
|
241
|
+
cid += 1
|
|
242
|
+
|
|
243
|
+
return "".join(pieces), citations
|
|
244
|
+
|
|
245
|
+
def __str__(self) -> str:
|
|
246
|
+
return str(self._value or "")
|
|
247
|
+
|
|
248
|
+
def _repr_html_(self) -> str:
|
|
249
|
+
return f"<pre>{self._value or ''}</pre>"
|
|
250
|
+
|
|
251
|
+
def get_citations(self) -> list[Citation]:
|
|
252
|
+
return self._citations
|
|
253
|
+
|
|
254
|
+
|
|
151
255
|
class QdrantIndexEngine(Engine):
|
|
152
256
|
_default_url = "http://localhost:6333"
|
|
153
257
|
_default_api_key = SYMAI_CONFIG.get("INDEXING_ENGINE_API_KEY", None)
|
|
@@ -421,15 +525,18 @@ class QdrantIndexEngine(Engine):
|
|
|
421
525
|
kwargs["index_get"] = True
|
|
422
526
|
self._configure_collection(**kwargs)
|
|
423
527
|
|
|
528
|
+
treat_as_search_engine = False
|
|
424
529
|
if operation == "search":
|
|
425
530
|
# Ensure collection exists - fail fast if it doesn't
|
|
426
531
|
self._ensure_collection_exists(collection_name)
|
|
427
|
-
|
|
532
|
+
search_kwargs = dict(kwargs)
|
|
533
|
+
index_top_k = search_kwargs.pop("index_top_k", self.index_top_k)
|
|
428
534
|
# Optional search parameters
|
|
429
|
-
score_threshold =
|
|
535
|
+
score_threshold = search_kwargs.pop("score_threshold", None)
|
|
430
536
|
# Accept both `query_filter` and `filter` for convenience
|
|
431
|
-
raw_filter =
|
|
537
|
+
raw_filter = search_kwargs.pop("query_filter", search_kwargs.pop("filter", None))
|
|
432
538
|
query_filter = self._build_query_filter(raw_filter)
|
|
539
|
+
treat_as_search_engine = bool(search_kwargs.pop("treat_as_search_engine", False))
|
|
433
540
|
|
|
434
541
|
# Use shared search helper that already handles retries and normalization
|
|
435
542
|
rsp = self._search_sync(
|
|
@@ -438,6 +545,7 @@ class QdrantIndexEngine(Engine):
|
|
|
438
545
|
limit=index_top_k,
|
|
439
546
|
score_threshold=score_threshold,
|
|
440
547
|
query_filter=query_filter,
|
|
548
|
+
**search_kwargs,
|
|
441
549
|
)
|
|
442
550
|
elif operation == "add":
|
|
443
551
|
# Create collection if it doesn't exist (only for write operations)
|
|
@@ -462,7 +570,10 @@ class QdrantIndexEngine(Engine):
|
|
|
462
570
|
|
|
463
571
|
metadata = {}
|
|
464
572
|
|
|
465
|
-
|
|
573
|
+
if operation == "search" and treat_as_search_engine:
|
|
574
|
+
rsp = self._format_search_results(rsp, collection_name)
|
|
575
|
+
else:
|
|
576
|
+
rsp = QdrantResult(rsp, query, embedding)
|
|
466
577
|
return [rsp], metadata
|
|
467
578
|
|
|
468
579
|
def prepare(self, argument):
|
|
@@ -513,7 +624,33 @@ class QdrantIndexEngine(Engine):
|
|
|
513
624
|
jitter=self.jitter,
|
|
514
625
|
)
|
|
515
626
|
def _func():
|
|
627
|
+
qdrant_kwargs = dict(kwargs)
|
|
516
628
|
query_vector_normalized = self._normalize_vector(query_vector)
|
|
629
|
+
with_payload = qdrant_kwargs.pop("with_payload", True)
|
|
630
|
+
with_vectors = qdrant_kwargs.pop("with_vectors", self.index_values)
|
|
631
|
+
# qdrant-client `query_points` is strict about extra kwargs and will assert if any
|
|
632
|
+
# unknown arguments are provided. Because our engine `forward()` passes decorator
|
|
633
|
+
# kwargs through the stack, we must drop engine-internal fields here.
|
|
634
|
+
#
|
|
635
|
+
# Keep only kwargs that `qdrant_client.QdrantClient.query_points` accepts (besides
|
|
636
|
+
# those we pass explicitly).
|
|
637
|
+
if "filter" in qdrant_kwargs and "query_filter" not in qdrant_kwargs:
|
|
638
|
+
# Convenience alias supported by our public API
|
|
639
|
+
qdrant_kwargs["query_filter"] = qdrant_kwargs.pop("filter")
|
|
640
|
+
|
|
641
|
+
allowed_qdrant_kwargs = {
|
|
642
|
+
"using",
|
|
643
|
+
"prefetch",
|
|
644
|
+
"query_filter",
|
|
645
|
+
"search_params",
|
|
646
|
+
"offset",
|
|
647
|
+
"score_threshold",
|
|
648
|
+
"lookup_from",
|
|
649
|
+
"consistency",
|
|
650
|
+
"shard_key_selector",
|
|
651
|
+
"timeout",
|
|
652
|
+
}
|
|
653
|
+
qdrant_kwargs = {k: v for k, v in qdrant_kwargs.items() if k in allowed_qdrant_kwargs}
|
|
517
654
|
# For single vector collections, pass vector directly to query parameter
|
|
518
655
|
# For named vector collections, use Query(near_vector=NamedVector(name="vector_name", vector=...))
|
|
519
656
|
# query_points API uses query_filter (not filter) for filtering
|
|
@@ -521,9 +658,9 @@ class QdrantIndexEngine(Engine):
|
|
|
521
658
|
collection_name=collection_name,
|
|
522
659
|
query=query_vector_normalized,
|
|
523
660
|
limit=top_k,
|
|
524
|
-
with_payload=
|
|
525
|
-
with_vectors=
|
|
526
|
-
**
|
|
661
|
+
with_payload=with_payload,
|
|
662
|
+
with_vectors=with_vectors,
|
|
663
|
+
**qdrant_kwargs,
|
|
527
664
|
)
|
|
528
665
|
# query_points returns QueryResponse with .points attribute, extract it
|
|
529
666
|
return response.points
|
|
@@ -860,6 +997,82 @@ class QdrantIndexEngine(Engine):
|
|
|
860
997
|
# Use _query which handles retry logic and vector normalization
|
|
861
998
|
return self._query(collection_name, query_vector, limit, **search_kwargs)
|
|
862
999
|
|
|
1000
|
+
def _resolve_payload_url(
|
|
1001
|
+
self, payload: dict[str, Any], collection_name: str, point_id: Any
|
|
1002
|
+
) -> str:
|
|
1003
|
+
source = (
|
|
1004
|
+
payload.get("source")
|
|
1005
|
+
or payload.get("url")
|
|
1006
|
+
or payload.get("file_path")
|
|
1007
|
+
or payload.get("path")
|
|
1008
|
+
)
|
|
1009
|
+
if isinstance(source, str) and source:
|
|
1010
|
+
if source.startswith(("http://", "https://", "file://")):
|
|
1011
|
+
return source
|
|
1012
|
+
|
|
1013
|
+
source_path = Path(source).expanduser()
|
|
1014
|
+
try:
|
|
1015
|
+
resolved = source_path.resolve()
|
|
1016
|
+
if resolved.exists() or source_path.is_absolute():
|
|
1017
|
+
return resolved.as_uri()
|
|
1018
|
+
except Exception:
|
|
1019
|
+
return str(source_path)
|
|
1020
|
+
return str(source_path)
|
|
1021
|
+
|
|
1022
|
+
return f"qdrant://{collection_name}/{point_id}"
|
|
1023
|
+
|
|
1024
|
+
def _resolve_payload_title(self, payload: dict[str, Any], url: str, page: Any) -> str:
|
|
1025
|
+
raw_title = payload.get("title")
|
|
1026
|
+
if isinstance(raw_title, str) and raw_title.strip():
|
|
1027
|
+
base = raw_title.strip()
|
|
1028
|
+
else:
|
|
1029
|
+
parsed = urlparse(url)
|
|
1030
|
+
path_part = parsed.path or url
|
|
1031
|
+
base = Path(path_part).stem or url
|
|
1032
|
+
|
|
1033
|
+
try:
|
|
1034
|
+
page_int = int(page) if page is not None else None
|
|
1035
|
+
except (TypeError, ValueError):
|
|
1036
|
+
page_int = None
|
|
1037
|
+
|
|
1038
|
+
if Path(urlparse(url).path).suffix.lower() == ".pdf" and page_int is not None:
|
|
1039
|
+
base = f"{base}#p{page_int}"
|
|
1040
|
+
|
|
1041
|
+
return base
|
|
1042
|
+
|
|
1043
|
+
def _format_search_results(self, points: list[ScoredPoint] | None, collection_name: str):
|
|
1044
|
+
results: list[dict[str, Any]] = []
|
|
1045
|
+
|
|
1046
|
+
for point in points or []:
|
|
1047
|
+
payload = getattr(point, "payload", {}) or {}
|
|
1048
|
+
text = payload.get("text") or payload.get("content")
|
|
1049
|
+
if isinstance(text, list):
|
|
1050
|
+
text = " ".join([t for t in text if isinstance(t, str)])
|
|
1051
|
+
if not isinstance(text, str):
|
|
1052
|
+
continue
|
|
1053
|
+
excerpt = text.strip()
|
|
1054
|
+
if not excerpt:
|
|
1055
|
+
continue
|
|
1056
|
+
|
|
1057
|
+
page = payload.get("page") or payload.get("page_number") or payload.get("pageIndex")
|
|
1058
|
+
url = self._resolve_payload_url(payload, collection_name, getattr(point, "id", ""))
|
|
1059
|
+
title = self._resolve_payload_title(payload, url, page)
|
|
1060
|
+
|
|
1061
|
+
results.append(
|
|
1062
|
+
{
|
|
1063
|
+
"url": url,
|
|
1064
|
+
"title": title,
|
|
1065
|
+
"excerpts": [excerpt],
|
|
1066
|
+
"source_id": payload.get("source_id")
|
|
1067
|
+
or payload.get("sourceId")
|
|
1068
|
+
or payload.get("chunk_id")
|
|
1069
|
+
or payload.get("chunkId")
|
|
1070
|
+
or getattr(point, "id", None),
|
|
1071
|
+
}
|
|
1072
|
+
)
|
|
1073
|
+
|
|
1074
|
+
return SearchResult({"results": results})
|
|
1075
|
+
|
|
863
1076
|
async def search(
|
|
864
1077
|
self,
|
|
865
1078
|
collection_name: str,
|
|
@@ -923,7 +1136,7 @@ class QdrantIndexEngine(Engine):
|
|
|
923
1136
|
if tmp_path.exists():
|
|
924
1137
|
tmp_path.unlink()
|
|
925
1138
|
|
|
926
|
-
async def chunk_and_upsert(
|
|
1139
|
+
async def chunk_and_upsert(
|
|
927
1140
|
self,
|
|
928
1141
|
collection_name: str,
|
|
929
1142
|
text: str | Symbol | None = None,
|
|
@@ -1001,8 +1214,7 @@ class QdrantIndexEngine(Engine):
|
|
|
1001
1214
|
# Add source to metadata if not already present
|
|
1002
1215
|
if metadata is None:
|
|
1003
1216
|
metadata = {}
|
|
1004
|
-
|
|
1005
|
-
metadata["source"] = doc_path.name
|
|
1217
|
+
metadata["source"] = str(doc_path.resolve())
|
|
1006
1218
|
|
|
1007
1219
|
# Handle document_url: download and read file using FileReader
|
|
1008
1220
|
elif document_url is not None:
|