symbolicai 1.3.0__tar.gz → 1.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (267) hide show
  1. {symbolicai-1.3.0 → symbolicai-1.5.0}/AGENTS.md +1 -1
  2. {symbolicai-1.3.0 → symbolicai-1.5.0}/PKG-INFO +4 -1
  3. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/ENGINES/indexing_engine.md +50 -8
  4. symbolicai-1.5.0/docs/source/ENGINES/scrape_engine.md +143 -0
  5. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/ENGINES/search_engine.md +72 -0
  6. {symbolicai-1.3.0 → symbolicai-1.5.0}/pyproject.toml +4 -1
  7. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/__init__.py +1 -1
  8. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/index/engine_qdrant.py +222 -10
  9. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/scrape/engine_requests.py +39 -10
  10. symbolicai-1.5.0/symai/backend/engines/search/__init__.py +13 -0
  11. symbolicai-1.5.0/symai/backend/engines/search/engine_firecrawl.py +333 -0
  12. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/search/engine_parallel.py +5 -5
  13. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/components.py +9 -3
  14. symbolicai-1.5.0/symai/extended/interfaces/firecrawl.py +30 -0
  15. symbolicai-1.5.0/symai/extended/interfaces/local_search.py +57 -0
  16. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/parallel.py +5 -5
  17. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/functional.py +3 -4
  18. {symbolicai-1.3.0 → symbolicai-1.5.0}/symbolicai.egg-info/PKG-INFO +4 -1
  19. {symbolicai-1.3.0 → symbolicai-1.5.0}/symbolicai.egg-info/SOURCES.txt +6 -1
  20. symbolicai-1.5.0/symbolicai.egg-info/dependency_links.txt +1 -0
  21. {symbolicai-1.3.0 → symbolicai-1.5.0}/symbolicai.egg-info/requires.txt +4 -0
  22. symbolicai-1.5.0/tests/data/symmetry_breaking.pdf +0 -0
  23. symbolicai-1.5.0/uv.lock +9197 -0
  24. symbolicai-1.3.0/docs/source/ENGINES/scrape_engine.md +0 -43
  25. symbolicai-1.3.0/symai/misc/__init__.py +0 -0
  26. symbolicai-1.3.0/uv.lock +0 -7673
  27. {symbolicai-1.3.0 → symbolicai-1.5.0}/.gitbook.yaml +0 -0
  28. {symbolicai-1.3.0 → symbolicai-1.5.0}/.github/FUNDING.yml +0 -0
  29. {symbolicai-1.3.0 → symbolicai-1.5.0}/.gitignore +0 -0
  30. {symbolicai-1.3.0 → symbolicai-1.5.0}/.symai/symsh.config.json +0 -0
  31. {symbolicai-1.3.0 → symbolicai-1.5.0}/CITATION.cff +0 -0
  32. {symbolicai-1.3.0 → symbolicai-1.5.0}/Dockerfile +0 -0
  33. {symbolicai-1.3.0 → symbolicai-1.5.0}/LICENSE +0 -0
  34. {symbolicai-1.3.0 → symbolicai-1.5.0}/MANIFEST.in +0 -0
  35. {symbolicai-1.3.0 → symbolicai-1.5.0}/README.md +0 -0
  36. {symbolicai-1.3.0 → symbolicai-1.5.0}/app.py +0 -0
  37. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/banner.png +0 -0
  38. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/cat.jpg +0 -0
  39. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/cat.png +0 -0
  40. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/contract_flow.png +0 -0
  41. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/img1.png +0 -0
  42. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/img10.png +0 -0
  43. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/img2.png +0 -0
  44. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/img3.png +0 -0
  45. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/img4.png +0 -0
  46. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/img5.png +0 -0
  47. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/img6.png +0 -0
  48. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/img7.png +0 -0
  49. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/img8.png +0 -0
  50. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/img9.png +0 -0
  51. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/preview.gif +0 -0
  52. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/screen1.jpeg +0 -0
  53. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/symai_logo.png +0 -0
  54. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/symsh.png +0 -0
  55. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/vid1.png +0 -0
  56. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/vid2.png +0 -0
  57. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/vid3.png +0 -0
  58. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/vid4.png +0 -0
  59. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/vid5.png +0 -0
  60. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/images/vid6.png +0 -0
  61. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/results/news.html +0 -0
  62. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/results/news.png +0 -0
  63. {symbolicai-1.3.0 → symbolicai-1.5.0}/assets/results/news_prev.png +0 -0
  64. {symbolicai-1.3.0 → symbolicai-1.5.0}/bin/install.ps1 +0 -0
  65. {symbolicai-1.3.0 → symbolicai-1.5.0}/bin/install.sh +0 -0
  66. {symbolicai-1.3.0 → symbolicai-1.5.0}/build.py +0 -0
  67. {symbolicai-1.3.0 → symbolicai-1.5.0}/docker-compose.yml +0 -0
  68. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/ENGINES/clip_engine.md +0 -0
  69. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/ENGINES/custom_engine.md +0 -0
  70. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/ENGINES/drawing_engine.md +0 -0
  71. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/ENGINES/file_engine.md +0 -0
  72. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/ENGINES/local_engine.md +0 -0
  73. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/ENGINES/neurosymbolic_engine.md +0 -0
  74. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/ENGINES/ocr_engine.md +0 -0
  75. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/ENGINES/speech_to_text_engine.md +0 -0
  76. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/ENGINES/symbolic_engine.md +0 -0
  77. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/FEATURES/contracts.md +0 -0
  78. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/FEATURES/error_handling.md +0 -0
  79. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/FEATURES/expressions.md +0 -0
  80. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/FEATURES/import.md +0 -0
  81. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/FEATURES/operations.md +0 -0
  82. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/FEATURES/primitives.md +0 -0
  83. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/INSTALLATION.md +0 -0
  84. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/INTRODUCTION.md +0 -0
  85. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/QUICKSTART.md +0 -0
  86. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/SUMMARY.md +0 -0
  87. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/TOOLS/chatbot.md +0 -0
  88. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/TOOLS/packages.md +0 -0
  89. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/TOOLS/shell.md +0 -0
  90. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/TUTORIALS/chatbot.md +0 -0
  91. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/TUTORIALS/context.md +0 -0
  92. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/TUTORIALS/data_query.md +0 -0
  93. {symbolicai-1.3.0 → symbolicai-1.5.0}/docs/source/TUTORIALS/video_tutorials.md +0 -0
  94. {symbolicai-1.3.0 → symbolicai-1.5.0}/environment.yml +0 -0
  95. {symbolicai-1.3.0 → symbolicai-1.5.0}/examples/contracts.ipynb +0 -0
  96. {symbolicai-1.3.0 → symbolicai-1.5.0}/examples/primitives.ipynb +0 -0
  97. {symbolicai-1.3.0 → symbolicai-1.5.0}/icon_converter.py +0 -0
  98. {symbolicai-1.3.0 → symbolicai-1.5.0}/installer.py +0 -0
  99. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/Basics.ipynb +0 -0
  100. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/ChatBot.ipynb +0 -0
  101. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/Conversation.ipynb +0 -0
  102. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/Indexer.ipynb +0 -0
  103. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/News.ipynb +0 -0
  104. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/Queries.ipynb +0 -0
  105. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/TTS_Persona.ipynb +0 -0
  106. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/Lean engine.png +0 -0
  107. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/a_star.txt +0 -0
  108. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/abstract.py +0 -0
  109. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/audio.mp3 +0 -0
  110. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/dbpedia_samples.jsonl +0 -0
  111. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/dbpedia_samples_prepared_train.jsonl +0 -0
  112. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/dbpedia_samples_prepared_valid.jsonl +0 -0
  113. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/demo.py +0 -0
  114. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/demo_strategy.py +0 -0
  115. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/docs.py +0 -0
  116. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/einsteins_puzzle.txt +0 -0
  117. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/file.json +0 -0
  118. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/lean.py +0 -0
  119. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/news.py +0 -0
  120. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/paper.pdf +0 -0
  121. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/paper.py +0 -0
  122. {symbolicai-1.3.0 → symbolicai-1.5.0}/legacy/notebooks/examples/sql.py +0 -0
  123. {symbolicai-1.3.0 → symbolicai-1.5.0}/public/eai.svg +0 -0
  124. {symbolicai-1.3.0 → symbolicai-1.5.0}/pytest.ini +0 -0
  125. {symbolicai-1.3.0 → symbolicai-1.5.0}/ruff.toml +0 -0
  126. {symbolicai-1.3.0 → symbolicai-1.5.0}/setup.cfg +0 -0
  127. {symbolicai-1.3.0 → symbolicai-1.5.0}/setup.py +0 -0
  128. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/TERMS_OF_SERVICE.md +0 -0
  129. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/__init__.py +0 -0
  130. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/base.py +0 -0
  131. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/__init__.py +0 -0
  132. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/drawing/engine_bfl.py +0 -0
  133. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/drawing/engine_gpt_image.py +0 -0
  134. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/embedding/engine_llama_cpp.py +0 -0
  135. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/embedding/engine_openai.py +0 -0
  136. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/execute/engine_python.py +0 -0
  137. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/files/engine_io.py +0 -0
  138. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/imagecaptioning/engine_blip2.py +0 -0
  139. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +0 -0
  140. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/index/engine_pinecone.py +0 -0
  141. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/index/engine_vectordb.py +0 -0
  142. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/lean/engine_lean4.py +0 -0
  143. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/__init__.py +0 -0
  144. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +0 -0
  145. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +0 -0
  146. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_cerebras.py +0 -0
  147. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +0 -0
  148. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +0 -0
  149. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_groq.py +0 -0
  150. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_huggingface.py +0 -0
  151. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_llama_cpp.py +0 -0
  152. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +0 -0
  153. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +0 -0
  154. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_openai_responses.py +0 -0
  155. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/ocr/engine_apilayer.py +0 -0
  156. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/output/engine_stdout.py +0 -0
  157. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/search/engine_openai.py +0 -0
  158. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/search/engine_perplexity.py +0 -0
  159. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/search/engine_serpapi.py +0 -0
  160. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/speech_to_text/engine_local_whisper.py +0 -0
  161. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/symbolic/engine_wolframalpha.py +0 -0
  162. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/text_to_speech/engine_openai.py +0 -0
  163. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/text_vision/engine_clip.py +0 -0
  164. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/engines/userinput/engine_console.py +0 -0
  165. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/mixin/__init__.py +0 -0
  166. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/mixin/anthropic.py +0 -0
  167. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/mixin/cerebras.py +0 -0
  168. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/mixin/deepseek.py +0 -0
  169. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/mixin/google.py +0 -0
  170. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/mixin/groq.py +0 -0
  171. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/mixin/openai.py +0 -0
  172. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/backend/settings.py +0 -0
  173. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/chat.py +0 -0
  174. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/collect/__init__.py +0 -0
  175. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/collect/dynamic.py +0 -0
  176. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/collect/pipeline.py +0 -0
  177. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/collect/stats.py +0 -0
  178. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/constraints.py +0 -0
  179. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/context.py +0 -0
  180. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/core.py +0 -0
  181. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/core_ext.py +0 -0
  182. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/endpoints/__init__py +0 -0
  183. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/endpoints/api.py +0 -0
  184. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/exceptions.py +0 -0
  185. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/.DS_Store +0 -0
  186. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/__init__.py +0 -0
  187. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/api_builder.py +0 -0
  188. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/arxiv_pdf_parser.py +0 -0
  189. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/bibtex_parser.py +0 -0
  190. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/conversation.py +0 -0
  191. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/document.py +0 -0
  192. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/file_merger.py +0 -0
  193. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/graph.py +0 -0
  194. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/html_style_template.py +0 -0
  195. {symbolicai-1.3.0/symai/server → symbolicai-1.5.0/symai/extended/interfaces}/__init__.py +0 -0
  196. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/blip_2.py +0 -0
  197. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/clip.py +0 -0
  198. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/console.py +0 -0
  199. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/dall_e.py +0 -0
  200. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/file.py +0 -0
  201. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/flux.py +0 -0
  202. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/gpt_image.py +0 -0
  203. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/input.py +0 -0
  204. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/llava.py +0 -0
  205. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/naive_scrape.py +0 -0
  206. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/naive_vectordb.py +0 -0
  207. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/ocr.py +0 -0
  208. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/openai_search.py +0 -0
  209. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/perplexity.py +0 -0
  210. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/pinecone.py +0 -0
  211. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/python.py +0 -0
  212. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/serpapi.py +0 -0
  213. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/terminal.py +0 -0
  214. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/tts.py +0 -0
  215. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/whisper.py +0 -0
  216. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/interfaces/wolframalpha.py +0 -0
  217. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/metrics/__init__.py +0 -0
  218. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/metrics/similarity.py +0 -0
  219. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/os_command.py +0 -0
  220. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/packages/__init__.py +0 -0
  221. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/packages/symdev.py +0 -0
  222. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/packages/sympkg.py +0 -0
  223. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/packages/symrun.py +0 -0
  224. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/repo_cloner.py +0 -0
  225. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/seo_query_optimizer.py +0 -0
  226. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/solver.py +0 -0
  227. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/summarizer.py +0 -0
  228. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/taypan_interpreter.py +0 -0
  229. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/extended/vectordb.py +0 -0
  230. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/formatter/__init__.py +0 -0
  231. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/formatter/emoji.pytxt +0 -0
  232. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/formatter/formatter.py +0 -0
  233. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/formatter/regex.py +0 -0
  234. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/imports.py +0 -0
  235. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/interfaces.py +0 -0
  236. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/memory.py +0 -0
  237. {symbolicai-1.3.0/symai/extended/interfaces → symbolicai-1.5.0/symai/menu}/__init__.py +0 -0
  238. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/menu/screen.py +0 -0
  239. {symbolicai-1.3.0/symai/menu → symbolicai-1.5.0/symai/misc}/__init__.py +0 -0
  240. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/misc/console.py +0 -0
  241. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/misc/loader.py +0 -0
  242. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/models/__init__.py +0 -0
  243. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/models/base.py +0 -0
  244. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/models/errors.py +0 -0
  245. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/ops/__init__.py +0 -0
  246. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/ops/measures.py +0 -0
  247. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/ops/primitives.py +0 -0
  248. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/post_processors.py +0 -0
  249. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/pre_processors.py +0 -0
  250. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/processor.py +0 -0
  251. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/prompts.py +0 -0
  252. /symbolicai-1.3.0/symbolicai.egg-info/dependency_links.txt → /symbolicai-1.5.0/symai/server/__init__.py +0 -0
  253. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/server/huggingface_server.py +0 -0
  254. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/server/llama_cpp_server.py +0 -0
  255. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/server/qdrant_server.py +0 -0
  256. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/shell.py +0 -0
  257. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/shellsv.py +0 -0
  258. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/strategy.py +0 -0
  259. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/symbol.py +0 -0
  260. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/symsh.md +0 -0
  261. {symbolicai-1.3.0 → symbolicai-1.5.0}/symai/utils.py +0 -0
  262. {symbolicai-1.3.0 → symbolicai-1.5.0}/symbolicai.egg-info/entry_points.txt +0 -0
  263. {symbolicai-1.3.0 → symbolicai-1.5.0}/symbolicai.egg-info/top_level.txt +0 -0
  264. {symbolicai-1.3.0 → symbolicai-1.5.0}/tests/README.md +0 -0
  265. {symbolicai-1.3.0 → symbolicai-1.5.0}/tests/data/audio.mp3 +0 -0
  266. {symbolicai-1.3.0 → symbolicai-1.5.0}/tests/data/pg1727.txt +0 -0
  267. {symbolicai-1.3.0 → symbolicai-1.5.0}/trusted_repos.yml +0 -0
@@ -66,7 +66,7 @@ CLI entrypoints (after install): `symchat`, `symsh`, `symconfig`, `symserver`.
66
66
  - Treat type hints as contracts; do not add runtime type checks except at trust boundaries (CLI/env, JSON/network, disk).
67
67
  - Prefer minimal diffs; edit existing code over adding new files unless necessary.
68
68
  - Do not add/modify `tests/` or run tests unless explicitly requested; if requested, run the narrowest relevant `pytest` command.
69
- - When you change Python files: run `ruff check <changed_files> --output-format concise --config ruff.toml` and fix issues.
69
+ - When you change Python files outside `tests/`: run `ruff check <changed_files> --output-format concise --config ruff.toml` and fix issues.
70
70
  - Keep search local-first (`rg`); follow imports instead of repo-wide “random scanning”.
71
71
  - If adding a regex, include a short comment explaining what it matches.
72
72
  - Update `TODO.md` when tasks are completed, added, or re-scoped.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: symbolicai
3
- Version: 1.3.0
3
+ Version: 1.5.0
4
4
  Summary: A Neurosymbolic Perspective on Large Language Models
5
5
  Author-email: Marius-Constantin Dinu <marius@extensity.ai>, Leoveanu-Condrei Claudiu <leo@extensity.ai>
6
6
  License: BSD 3-Clause License
@@ -113,6 +113,7 @@ Requires-Dist: openai-whisper>=20240930; extra == "whisper"
113
113
  Requires-Dist: numba>=0.62.1; extra == "whisper"
114
114
  Requires-Dist: llvmlite>=0.45.1; extra == "whisper"
115
115
  Provides-Extra: search
116
+ Requires-Dist: firecrawl-py>=4.12.0; extra == "search"
116
117
  Requires-Dist: parallel-web>=0.3.3; extra == "search"
117
118
  Provides-Extra: serpapi
118
119
  Requires-Dist: google_search_results>=2.4.2; extra == "serpapi"
@@ -136,6 +137,8 @@ Requires-Dist: symbolicai[serpapi]; extra == "all"
136
137
  Requires-Dist: symbolicai[services]; extra == "all"
137
138
  Requires-Dist: symbolicai[solver]; extra == "all"
138
139
  Requires-Dist: symbolicai[qdrant]; extra == "all"
140
+ Provides-Extra: dev
141
+ Requires-Dist: pytest-asyncio>=1.3.0; extra == "dev"
139
142
  Dynamic: license-file
140
143
 
141
144
  # **SymbolicAI: A neuro-symbolic perspective on LLMs**
@@ -31,19 +31,22 @@ The Qdrant engine provides a production-ready vector database for scalable RAG a
31
31
 
32
32
  ### Setup
33
33
 
34
- #### Option 1: Local Qdrant Server
34
+ #### Option 1: Local Qdrant Server (via symserver)
35
35
 
36
- Start a local Qdrant server using the built-in wrapper:
36
+ Start Qdrant using the `symserver` CLI (Docker by default).
37
37
 
38
38
  ```bash
39
- # Using Docker (default)
40
- python -m symai.server.qdrant_server
39
+ # Pull the image once (recommended)
40
+ docker pull qdrant/qdrant:latest
41
41
 
42
- # Using Qdrant binary
43
- python -m symai.server.qdrant_server --mode binary --binary-path /path/to/qdrant
42
+ # Docker (default): set INDEXING_ENGINE so symserver selects Qdrant
43
+ INDEXING_ENGINE=qdrant symserver --host 0.0.0.0 --port 6333 --storage-path ./qdrant_storage
44
44
 
45
- # Custom configuration
46
- python -m symai.server.qdrant_server --host 0.0.0.0 --port 6333 --storage-path ./qdrant_storage
45
+ # Use native binary
46
+ INDEXING_ENGINE=qdrant symserver --env binary --binary-path /path/to/qdrant --port 6333 --storage-path ./qdrant_storage
47
+
48
+ # Detach Docker if desired
49
+ INDEXING_ENGINE=qdrant symserver --docker-detach
47
50
  ```
48
51
 
49
52
  #### Option 2: Cloud Qdrant
@@ -103,6 +106,43 @@ async def basic_usage():
103
106
  asyncio.run(basic_usage())
104
107
  ```
105
108
 
109
+ ### Local Search with citations
110
+
111
+ If you need citation-formatted results compatible with `parallel.search`, use the `local_search` interface. It embeds the query locally, queries Qdrant, and returns a `SearchResult` (with `value` and `citations`) instead of raw `ScoredPoint` objects:
112
+
113
+ Local search accepts the same args as passed to Qdrant directly: `collection_name`/`index_name`, `limit`/`top_k`/`index_top_k`, `score_threshold`, `query_filter` (dict or Qdrant `Filter`), and any extra Qdrant search kwargs. Citation fields are derived from Qdrant payloads: the excerpt uses `payload["text"]` (or `content`), the URL is resolved from `payload["source"]`/`url`/`file_path`/`path` and is always returned as an absolute `file://` URI (relative inputs resolve against the current working directory), and the title is the stem of that path (PDF pages append `#p{page}` when provided). Each matching chunk yields its own citation; multiple citations can point to the same file.
114
+
115
+ If you want a stable source header for each chunk, store a `source_id` or `chunk_id` in the payload (otherwise the Qdrant point id is used).
116
+
117
+ Example:
118
+
119
+ ```python
120
+ from symai.interfaces import Interface
121
+ from qdrant_client.http import models
122
+
123
+ search = Interface("local_search", index_name="my_collection")
124
+
125
+ qdrant_filter = models.Filter(
126
+ must=[
127
+ models.FieldCondition(key="category", match=models.MatchValue(value="AI"))
128
+ ]
129
+ )
130
+
131
+ result = search.search(
132
+ "neural networks and transformers",
133
+ collection_name="my_collection", # alias: index_name
134
+ limit=5, # aliases: top_k, index_top_k
135
+ score_threshold=0.35,
136
+ query_filter=qdrant_filter, # or a simple dict like {"category": "AI"}
137
+ with_payload=True, # passed through to Qdrant query_points
138
+ with_vectors=False, # optional; defaults follow engine config
139
+ # any other Qdrant query_points kwargs can be added here
140
+ )
141
+
142
+ print(result.value) # formatted text with [1], [2] markers
143
+ print(result.get_citations()) # list of Citation objects
144
+ ```
145
+
106
146
  ### Collection Management
107
147
 
108
148
  Create and manage collections programmatically:
@@ -156,6 +196,8 @@ async def add_documents():
156
196
  document_path="/path/to/document.pdf",
157
197
  metadata={"source": "document.pdf"}
158
198
  )
199
+ # Note: document_path indexing stores the absolute file path in payload["source"]
200
+ # so local_search citations resolve to file:// URIs.
159
201
 
160
202
  # Chunk and index from a URL
161
203
  num_chunks = await engine.chunk_and_upsert(
@@ -0,0 +1,143 @@
1
+ # Scrape Engine
2
+
3
+ ## Naive Scrape
4
+
5
+ To access data from the web, we can use the `naive_scrape` interface. The engine underneath is very lightweight and can be used to scrape data from websites. It is based on the `requests` library, as well as `trafilatura` for output formatting, and `bs4` for HTML parsing. `trafilatura` currently supports the following output formats: `json`, `csv`, `html`, `markdown`, `text`, `xml`
6
+
7
+ ```python
8
+ from symai.interfaces import Interface
9
+
10
+ scraper = Interface("naive_scrape")
11
+ url = "https://docs.astral.sh/uv/guides/scripts/#next-steps"
12
+ res = scraper(url)
13
+ ```
14
+
15
+ ## Parallel (Parallel.ai)
16
+
17
+ The Parallel.ai integration routes scrape calls through the official `parallel-web` SDK and can handle PDFs, JavaScript-heavy feeds, and standard HTML pages in the same workflow. Instantiate the Parallel interface and call `.scrape(...)` with the target URL. The engine detects scrape requests automatically whenever a URL is supplied.
18
+
19
+ ```python
20
+ from symai.extended import Interface
21
+
22
+ scraper = Interface("parallel")
23
+ article = scraper.scrape(
24
+ "https://trafilatura.readthedocs.io/en/latest/crawls.html",
25
+ full_content=True, # optional: request full document text
26
+ excerpts=True, # optional: default True, retain excerpt snippets
27
+ objective="Summarize crawl guidance for internal notes."
28
+ )
29
+ print(str(article))
30
+ ```
31
+
32
+ Configuration requires a Parallel API key and the Parallel model token. Add the following to your settings:
33
+
34
+ ```bash
35
+ {
36
+
37
+ "SEARCH_ENGINE_API_KEY": "…",
38
+ "SEARCH_ENGINE_MODEL": "parallel"
39
+
40
+ }
41
+ ```
42
+
43
+ When invoked with a URL, the engine hits Parallel's Extract API and returns an `ExtractResult`. The result string joins excerpts or the full content if requested. Because processing is offloaded to Parallel's hosted infrastructure, the engine remains reliable on dynamic pages that the naive scraper cannot render. Install the dependency with `pip install parallel-web` before enabling this engine.
44
+
45
+ ## Firecrawl
46
+
47
+ Firecrawl.dev specializes in reliable web scraping with automatic handling of JavaScript-rendered content, proxies, and anti-bot mechanisms. It converts web pages into clean formats suitable for LLM consumption and supports advanced features like actions, caching, and location-based scraping.
48
+
49
+ ### Examples
50
+
51
+ ```python
52
+ from symai.extended import Interface
53
+
54
+ scraper = Interface("firecrawl")
55
+
56
+ # Example 1: Basic webpage scraping
57
+ content = scraper.scrape(
58
+ "https://docs.firecrawl.dev/introduction",
59
+ formats=["markdown"]
60
+ )
61
+ print(content)
62
+
63
+ # Example 2: PDF scraping with content extraction and trimming
64
+ pdf_full = scraper.scrape(
65
+ "https://pmc.ncbi.nlm.nih.gov/articles/PMC7231600",
66
+ only_main_content=True,
67
+ formats=["markdown"],
68
+ proxy="auto"
69
+ )
70
+ # Trim locally if needed
71
+ pdf_trimmed = str(pdf_full)[:100]
72
+
73
+ # Note: JS-heavy sites like Twitter/LinkedIn are currently not fully supported
74
+ # They typically return 403 Forbidden errors (may vary by subscription tier)
75
+ ```
76
+
77
+ ### Configuration
78
+
79
+ Enable the engine by configuring Firecrawl credentials:
80
+
81
+ ```bash
82
+ {
83
+ "SEARCH_ENGINE_API_KEY": "fc-your-api-key",
84
+ "SEARCH_ENGINE_MODEL": "firecrawl"
85
+ }
86
+ ```
87
+
88
+ ### JSON Schema Extraction
89
+
90
+ Firecrawl supports structured data extraction using JSON schemas. This is useful for extracting specific fields from web pages using LLM-powered extraction:
91
+
92
+ ```python
93
+ from pydantic import Field
94
+ from symai.extended import Interface
95
+ from symai.models import LLMDataModel
96
+
97
+ class MetadataModel(LLMDataModel):
98
+ """Bibliographic metadata extracted from a source document."""
99
+ title: str = Field(description="Title of the source.")
100
+ year: str = Field(description="Publication year (4 digits) or Unknown.")
101
+ authors: list[str] = Field(default_factory=list, description="List of authors.")
102
+ doi: str | None = Field(default=None, description="DOI if available.")
103
+
104
+ # Build JSON format config from Pydantic schema
105
+ schema = MetadataModel.model_json_schema()
106
+ json_format = {
107
+ "type": "json",
108
+ "prompt": "Extract bibliographic metadata from this academic paper.",
109
+ "schema": schema,
110
+ }
111
+
112
+ scraper = Interface("firecrawl")
113
+ result = scraper.scrape(
114
+ "https://journals.physiology.org/doi/full/10.1152/ajpregu.00051.2002",
115
+ formats=[json_format],
116
+ proxy="auto"
117
+ )
118
+
119
+ # Access extracted data as dict
120
+ extracted = result.raw["json"]
121
+ metadata = MetadataModel(**extracted)
122
+ print(metadata.model_dump())
123
+
124
+ # Or as JSON string
125
+ print(str(result))
126
+ ```
127
+
128
+ ### Supported Parameters
129
+
130
+ The engine supports many parameters (passed as kwargs). Common ones include:
131
+ - **formats**: Output formats (["markdown"], ["html"], ["rawHtml"])
132
+ - **only_main_content**: Extract main content only (boolean)
133
+ - **proxy**: Proxy mode ("basic", "stealth", "auto")
134
+ - **location**: Geographic location object with optional country and languages
135
+ - Example: `{"country": "US"}` or `{"country": "RO", "languages": ["ro"]}`
136
+ - **maxAge**: Cache duration in seconds (integer)
137
+ - **storeInCache**: Enable caching (boolean)
138
+ - **actions**: Page interactions before scraping (list of action objects)
139
+ - Example: `[{"type": "wait", "milliseconds": 2000}]`
140
+ - Example: `[{"type": "click", "selector": ".button"}]`
141
+ - Example: `[{"type": "scroll", "direction": "down", "amount": 500}]`
142
+
143
+ Check the Firecrawl v2 API documentation for the complete list of available parameters and action types.
@@ -152,3 +152,75 @@ Here's how to configure the OpenAI search engine:
152
152
  ```
153
153
 
154
154
  This engine calls the OpenAI Responses API under the hood. When you target a reasoning-capable model, pass a `reasoning` dictionary matching the Responses payload schema (for example `{"effort": "low", "summary": "auto"}`). If omitted, the engine falls back to the default effort/summary settings shown above.
155
+
156
+ ## Firecrawl
157
+ Firecrawl.dev provides web scraping and search capabilities with built-in handling of dynamic JavaScript content and anti-bot mechanisms. The engine converts web pages into clean markdown and can perform web searches across multiple sources with advanced filtering and content extraction.
158
+
159
+ ### Comprehensive Search Example
160
+
161
+ ```python
162
+ from symai.extended import Interface
163
+
164
+ engine = Interface("firecrawl")
165
+
166
+ # Example 1: Location-aware search with language, scraping, and citations
167
+ result = engine.search(
168
+ "who is nicusor dan",
169
+ limit=5,
170
+ location="Romania",
171
+ lang="ro",
172
+ sources=["web"],
173
+ formats=["markdown"],
174
+ only_main_content=True,
175
+ proxy="stealth"
176
+ )
177
+
178
+ # Access structured citations (similar to parallel.ai)
179
+ citations = result.get_citations()
180
+ for citation in citations:
181
+ print(f"[{citation.id}] {citation.title}: {citation.url}")
182
+
183
+ # Example 2: Domain-filtered search with character limits
184
+ domains = ["arxiv.org", "nature.com"]
185
+ filters = " OR ".join(f"site:{domain}" for domain in domains)
186
+ query = f"({filters}) what is thermodynamic computing"
187
+
188
+ result = engine.search(
189
+ query,
190
+ limit=10,
191
+ max_chars_per_result=500,
192
+ categories=["research"],
193
+ formats=["markdown"],
194
+ proxy="basic"
195
+ )
196
+ print(result)
197
+ ```
198
+
199
+ ### Configuration
200
+
201
+ Enable the engine by configuring Firecrawl credentials:
202
+
203
+ ```bash
204
+ {
205
+ "SEARCH_ENGINE_API_KEY": "fc-your-api-key",
206
+ "SEARCH_ENGINE_MODEL": "firecrawl"
207
+ }
208
+ ```
209
+
210
+ ### Supported Parameters
211
+
212
+ The engine supports many parameters (passed as kwargs). Common ones include:
213
+ - **limit**: Max number of results
214
+ - **location**: Country code string for search (e.g., "Romania", "Germany")
215
+ - **lang**: Language code string for search (e.g., "ro", "es") - hint, not enforcement
216
+ - **sources**: List of sources (["web"], ["news"], ["images"])
217
+ - **categories**: Content types (["research"], ["github"], ["pdf"])
218
+ - **tbs**: Time-based filter (e.g., "qdr:d" for past day)
219
+ - **formats**: Output formats for scraped content (["markdown"], ["html"])
220
+ - **only_main_content**: Extract main content only when scraping (boolean)
221
+ - **max_chars_per_result**: Truncate results locally (integer)
222
+ - **proxy**: Proxy mode for scraping ("basic", "stealth", "auto")
223
+ - **scrape_location**: Location object for scraping with optional country and languages
224
+ - Example: `{"country": "US"}` or `{"country": "RO", "languages": ["ro"]}`
225
+
226
+ Check the Firecrawl v2 API documentation for the complete list of available parameters.
@@ -78,7 +78,7 @@ scrape = ["beautifulsoup4>=4.12.3", "trafilatura>=2.0.0", "pdfminer.six",
78
78
  llama_cpp = ["llama-cpp-python[server]>=0.3.7"] # handle separately since this dependency may not compile and require special maintenance
79
79
  wolframalpha = ["wolframalpha>=5.0.0"]
80
80
  whisper = ["openai-whisper>=20240930", "numba>=0.62.1", "llvmlite>=0.45.1"]
81
- search = ["parallel-web>=0.3.3"]
81
+ search = ["firecrawl-py>=4.12.0", "parallel-web>=0.3.3"]
82
82
  serpapi = ["google_search_results>=2.4.2"]
83
83
  services = ["fastapi>=0.110.0", "redis>=5.0.2", "uvicorn>=0.27.1"]
84
84
  solver = ["z3-solver>=4.12.6.0"]
@@ -94,6 +94,9 @@ all = [
94
94
  "symbolicai[solver]",
95
95
  "symbolicai[qdrant]"
96
96
  ]
97
+ dev = [
98
+ "pytest-asyncio>=1.3.0",
99
+ ]
97
100
 
98
101
  [tool.setuptools.dynamic]
99
102
  version = {attr = "symai.SYMAI_VERSION"}
@@ -33,7 +33,7 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
33
33
  # Create singleton instance
34
34
  config_manager = settings.SymAIConfig()
35
35
 
36
- SYMAI_VERSION = "1.3.0"
36
+ SYMAI_VERSION = "1.5.0"
37
37
  __version__ = SYMAI_VERSION
38
38
  __root_dir__ = config_manager.config_dir
39
39
 
@@ -4,8 +4,10 @@ import tempfile
4
4
  import urllib.request
5
5
  import uuid
6
6
  import warnings
7
+ from dataclasses import dataclass
7
8
  from pathlib import Path
8
9
  from typing import Any
10
+ from urllib.parse import urlparse
9
11
 
10
12
  import numpy as np
11
13
 
@@ -148,6 +150,108 @@ Matches:
148
150
  return f"<ul>{doc_str}</ul>"
149
151
 
150
152
 
153
+ @dataclass
154
+ class Citation:
155
+ id: int
156
+ title: str
157
+ url: str
158
+ start: int
159
+ end: int
160
+
161
+ def __hash__(self):
162
+ return hash((self.url,))
163
+
164
+
165
+ class SearchResult(Result):
166
+ def __init__(self, value: dict[str, Any] | Any, **kwargs) -> None:
167
+ super().__init__(value, **kwargs)
168
+ if isinstance(value, dict) and value.get("error"):
169
+ UserMessage(value["error"], raise_with=ValueError)
170
+ results = self._coerce_results(value)
171
+ text, citations = self._build_text_and_citations(results)
172
+ self._value = text
173
+ self._citations = citations
174
+
175
+ def _coerce_results(self, raw: Any) -> list[dict[str, Any]]:
176
+ if raw is None:
177
+ return []
178
+ results = raw.get("results", []) if isinstance(raw, dict) else getattr(raw, "results", None)
179
+ if not results:
180
+ return []
181
+ return [item for item in results if isinstance(item, dict)]
182
+
183
+ def _source_identifier(self, item: dict[str, Any], url: str) -> str:
184
+ for key in ("source_id", "sourceId", "sourceID", "id"):
185
+ raw = item.get(key)
186
+ if raw is None:
187
+ continue
188
+ text = str(raw).strip()
189
+ if text:
190
+ return text
191
+ path = Path(urlparse(url).path)
192
+ return path.name or path.as_posix() or url
193
+
194
+ def _build_text_and_citations(self, results: list[dict[str, Any]]):
195
+ pieces = []
196
+ citations = []
197
+ cursor = 0
198
+ cid = 1
199
+ separator = "\n\n---\n\n"
200
+
201
+ for item in results:
202
+ url = str(item.get("url") or "")
203
+ if not url:
204
+ continue
205
+
206
+ title = str(item.get("title") or "")
207
+ if not title:
208
+ path = Path(urlparse(url).path)
209
+ title = path.name or url
210
+
211
+ excerpts = item.get("excerpts") or []
212
+ excerpt_parts = [ex.strip() for ex in excerpts if isinstance(ex, str) and ex.strip()]
213
+ if not excerpt_parts:
214
+ continue
215
+
216
+ combined_excerpt = "\n\n".join(excerpt_parts)
217
+ source_id = self._source_identifier(item, url)
218
+ block_body = combined_excerpt if not source_id else f"{source_id}\n\n{combined_excerpt}"
219
+
220
+ if pieces:
221
+ pieces.append(separator)
222
+ cursor += len(separator)
223
+
224
+ opening_tag = "<source>\n"
225
+ pieces.append(opening_tag)
226
+ cursor += len(opening_tag)
227
+
228
+ pieces.append(block_body)
229
+ cursor += len(block_body)
230
+
231
+ closing_tag = "\n</source>"
232
+ pieces.append(closing_tag)
233
+ cursor += len(closing_tag)
234
+
235
+ marker = f"[{cid}]"
236
+ start = cursor
237
+ pieces.append(marker)
238
+ cursor += len(marker)
239
+
240
+ citations.append(Citation(id=cid, title=title or url, url=url, start=start, end=cursor))
241
+ cid += 1
242
+
243
+ return "".join(pieces), citations
244
+
245
+ def __str__(self) -> str:
246
+ return str(self._value or "")
247
+
248
+ def _repr_html_(self) -> str:
249
+ return f"<pre>{self._value or ''}</pre>"
250
+
251
+ def get_citations(self) -> list[Citation]:
252
+ return self._citations
253
+
254
+
151
255
  class QdrantIndexEngine(Engine):
152
256
  _default_url = "http://localhost:6333"
153
257
  _default_api_key = SYMAI_CONFIG.get("INDEXING_ENGINE_API_KEY", None)
@@ -421,15 +525,18 @@ class QdrantIndexEngine(Engine):
421
525
  kwargs["index_get"] = True
422
526
  self._configure_collection(**kwargs)
423
527
 
528
+ treat_as_search_engine = False
424
529
  if operation == "search":
425
530
  # Ensure collection exists - fail fast if it doesn't
426
531
  self._ensure_collection_exists(collection_name)
427
- index_top_k = kwargs.get("index_top_k", self.index_top_k)
532
+ search_kwargs = dict(kwargs)
533
+ index_top_k = search_kwargs.pop("index_top_k", self.index_top_k)
428
534
  # Optional search parameters
429
- score_threshold = kwargs.get("score_threshold")
535
+ score_threshold = search_kwargs.pop("score_threshold", None)
430
536
  # Accept both `query_filter` and `filter` for convenience
431
- raw_filter = kwargs.get("query_filter", kwargs.get("filter"))
537
+ raw_filter = search_kwargs.pop("query_filter", search_kwargs.pop("filter", None))
432
538
  query_filter = self._build_query_filter(raw_filter)
539
+ treat_as_search_engine = bool(search_kwargs.pop("treat_as_search_engine", False))
433
540
 
434
541
  # Use shared search helper that already handles retries and normalization
435
542
  rsp = self._search_sync(
@@ -438,6 +545,7 @@ class QdrantIndexEngine(Engine):
438
545
  limit=index_top_k,
439
546
  score_threshold=score_threshold,
440
547
  query_filter=query_filter,
548
+ **search_kwargs,
441
549
  )
442
550
  elif operation == "add":
443
551
  # Create collection if it doesn't exist (only for write operations)
@@ -462,7 +570,10 @@ class QdrantIndexEngine(Engine):
462
570
 
463
571
  metadata = {}
464
572
 
465
- rsp = QdrantResult(rsp, query, embedding)
573
+ if operation == "search" and treat_as_search_engine:
574
+ rsp = self._format_search_results(rsp, collection_name)
575
+ else:
576
+ rsp = QdrantResult(rsp, query, embedding)
466
577
  return [rsp], metadata
467
578
 
468
579
  def prepare(self, argument):
@@ -513,7 +624,33 @@ class QdrantIndexEngine(Engine):
513
624
  jitter=self.jitter,
514
625
  )
515
626
  def _func():
627
+ qdrant_kwargs = dict(kwargs)
516
628
  query_vector_normalized = self._normalize_vector(query_vector)
629
+ with_payload = qdrant_kwargs.pop("with_payload", True)
630
+ with_vectors = qdrant_kwargs.pop("with_vectors", self.index_values)
631
+ # qdrant-client `query_points` is strict about extra kwargs and will assert if any
632
+ # unknown arguments are provided. Because our engine `forward()` passes decorator
633
+ # kwargs through the stack, we must drop engine-internal fields here.
634
+ #
635
+ # Keep only kwargs that `qdrant_client.QdrantClient.query_points` accepts (besides
636
+ # those we pass explicitly).
637
+ if "filter" in qdrant_kwargs and "query_filter" not in qdrant_kwargs:
638
+ # Convenience alias supported by our public API
639
+ qdrant_kwargs["query_filter"] = qdrant_kwargs.pop("filter")
640
+
641
+ allowed_qdrant_kwargs = {
642
+ "using",
643
+ "prefetch",
644
+ "query_filter",
645
+ "search_params",
646
+ "offset",
647
+ "score_threshold",
648
+ "lookup_from",
649
+ "consistency",
650
+ "shard_key_selector",
651
+ "timeout",
652
+ }
653
+ qdrant_kwargs = {k: v for k, v in qdrant_kwargs.items() if k in allowed_qdrant_kwargs}
517
654
  # For single vector collections, pass vector directly to query parameter
518
655
  # For named vector collections, use Query(near_vector=NamedVector(name="vector_name", vector=...))
519
656
  # query_points API uses query_filter (not filter) for filtering
@@ -521,9 +658,9 @@ class QdrantIndexEngine(Engine):
521
658
  collection_name=collection_name,
522
659
  query=query_vector_normalized,
523
660
  limit=top_k,
524
- with_payload=True,
525
- with_vectors=self.index_values,
526
- **kwargs,
661
+ with_payload=with_payload,
662
+ with_vectors=with_vectors,
663
+ **qdrant_kwargs,
527
664
  )
528
665
  # query_points returns QueryResponse with .points attribute, extract it
529
666
  return response.points
@@ -860,6 +997,82 @@ class QdrantIndexEngine(Engine):
860
997
  # Use _query which handles retry logic and vector normalization
861
998
  return self._query(collection_name, query_vector, limit, **search_kwargs)
862
999
 
1000
+ def _resolve_payload_url(
1001
+ self, payload: dict[str, Any], collection_name: str, point_id: Any
1002
+ ) -> str:
1003
+ source = (
1004
+ payload.get("source")
1005
+ or payload.get("url")
1006
+ or payload.get("file_path")
1007
+ or payload.get("path")
1008
+ )
1009
+ if isinstance(source, str) and source:
1010
+ if source.startswith(("http://", "https://", "file://")):
1011
+ return source
1012
+
1013
+ source_path = Path(source).expanduser()
1014
+ try:
1015
+ resolved = source_path.resolve()
1016
+ if resolved.exists() or source_path.is_absolute():
1017
+ return resolved.as_uri()
1018
+ except Exception:
1019
+ return str(source_path)
1020
+ return str(source_path)
1021
+
1022
+ return f"qdrant://{collection_name}/{point_id}"
1023
+
1024
+ def _resolve_payload_title(self, payload: dict[str, Any], url: str, page: Any) -> str:
1025
+ raw_title = payload.get("title")
1026
+ if isinstance(raw_title, str) and raw_title.strip():
1027
+ base = raw_title.strip()
1028
+ else:
1029
+ parsed = urlparse(url)
1030
+ path_part = parsed.path or url
1031
+ base = Path(path_part).stem or url
1032
+
1033
+ try:
1034
+ page_int = int(page) if page is not None else None
1035
+ except (TypeError, ValueError):
1036
+ page_int = None
1037
+
1038
+ if Path(urlparse(url).path).suffix.lower() == ".pdf" and page_int is not None:
1039
+ base = f"{base}#p{page_int}"
1040
+
1041
+ return base
1042
+
1043
+ def _format_search_results(self, points: list[ScoredPoint] | None, collection_name: str):
1044
+ results: list[dict[str, Any]] = []
1045
+
1046
+ for point in points or []:
1047
+ payload = getattr(point, "payload", {}) or {}
1048
+ text = payload.get("text") or payload.get("content")
1049
+ if isinstance(text, list):
1050
+ text = " ".join([t for t in text if isinstance(t, str)])
1051
+ if not isinstance(text, str):
1052
+ continue
1053
+ excerpt = text.strip()
1054
+ if not excerpt:
1055
+ continue
1056
+
1057
+ page = payload.get("page") or payload.get("page_number") or payload.get("pageIndex")
1058
+ url = self._resolve_payload_url(payload, collection_name, getattr(point, "id", ""))
1059
+ title = self._resolve_payload_title(payload, url, page)
1060
+
1061
+ results.append(
1062
+ {
1063
+ "url": url,
1064
+ "title": title,
1065
+ "excerpts": [excerpt],
1066
+ "source_id": payload.get("source_id")
1067
+ or payload.get("sourceId")
1068
+ or payload.get("chunk_id")
1069
+ or payload.get("chunkId")
1070
+ or getattr(point, "id", None),
1071
+ }
1072
+ )
1073
+
1074
+ return SearchResult({"results": results})
1075
+
863
1076
  async def search(
864
1077
  self,
865
1078
  collection_name: str,
@@ -923,7 +1136,7 @@ class QdrantIndexEngine(Engine):
923
1136
  if tmp_path.exists():
924
1137
  tmp_path.unlink()
925
1138
 
926
- async def chunk_and_upsert( # noqa: C901
1139
+ async def chunk_and_upsert(
927
1140
  self,
928
1141
  collection_name: str,
929
1142
  text: str | Symbol | None = None,
@@ -1001,8 +1214,7 @@ class QdrantIndexEngine(Engine):
1001
1214
  # Add source to metadata if not already present
1002
1215
  if metadata is None:
1003
1216
  metadata = {}
1004
- if "source" not in metadata:
1005
- metadata["source"] = doc_path.name
1217
+ metadata["source"] = str(doc_path.resolve())
1006
1218
 
1007
1219
  # Handle document_url: download and read file using FileReader
1008
1220
  elif document_url is not None: