symbolicai 0.19.0__tar.gz → 0.20.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. {symbolicai-0.19.0/docs/source → symbolicai-0.20.1}/LICENSE +9 -13
  2. {symbolicai-0.19.0 → symbolicai-0.20.1}/PKG-INFO +35 -2
  3. {symbolicai-0.19.0 → symbolicai-0.20.1}/README.md +1 -1
  4. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/ENGINES/search_engine.md +1 -1
  5. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/SUMMARY.md +0 -3
  6. {symbolicai-0.19.0 → symbolicai-0.20.1}/pyproject.toml +3 -2
  7. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/__init__.py +1 -1
  8. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +2 -1
  9. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +2 -1
  10. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/search/engine_openai.py +69 -5
  11. symbolicai-0.20.1/symai/backend/engines/webscraping/engine_requests.py +323 -0
  12. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/mixin/anthropic.py +7 -1
  13. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/interfaces/naive_webscraping.py +4 -2
  14. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/menu/screen.py +9 -6
  15. {symbolicai-0.19.0 → symbolicai-0.20.1}/symbolicai.egg-info/PKG-INFO +35 -2
  16. {symbolicai-0.19.0 → symbolicai-0.20.1}/symbolicai.egg-info/SOURCES.txt +1 -1
  17. {symbolicai-0.19.0 → symbolicai-0.20.1}/symbolicai.egg-info/requires.txt +2 -0
  18. {symbolicai-0.19.0 → symbolicai-0.20.1}/tests/engines/search/openai_engine.py +27 -3
  19. {symbolicai-0.19.0 → symbolicai-0.20.1}/uv.lock +90 -3
  20. symbolicai-0.19.0/symai/backend/engines/webscraping/engine_requests.py +0 -128
  21. {symbolicai-0.19.0 → symbolicai-0.20.1}/.gitbook.yaml +0 -0
  22. {symbolicai-0.19.0 → symbolicai-0.20.1}/.github/FUNDING.yml +0 -0
  23. {symbolicai-0.19.0 → symbolicai-0.20.1}/.gitignore +0 -0
  24. {symbolicai-0.19.0 → symbolicai-0.20.1}/.symai/symsh.config.json +0 -0
  25. {symbolicai-0.19.0 → symbolicai-0.20.1}/CITATION.cff +0 -0
  26. {symbolicai-0.19.0 → symbolicai-0.20.1}/Dockerfile +0 -0
  27. {symbolicai-0.19.0 → symbolicai-0.20.1}/MANIFEST.in +0 -0
  28. {symbolicai-0.19.0 → symbolicai-0.20.1}/app.py +0 -0
  29. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/banner.png +0 -0
  30. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/cat.jpg +0 -0
  31. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/cat.png +0 -0
  32. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/contract_flow.png +0 -0
  33. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/img1.png +0 -0
  34. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/img10.png +0 -0
  35. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/img2.png +0 -0
  36. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/img3.png +0 -0
  37. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/img4.png +0 -0
  38. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/img5.png +0 -0
  39. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/img6.png +0 -0
  40. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/img7.png +0 -0
  41. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/img8.png +0 -0
  42. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/img9.png +0 -0
  43. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/preview.gif +0 -0
  44. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/screen1.jpeg +0 -0
  45. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/symai_logo.png +0 -0
  46. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/symsh.png +0 -0
  47. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/vid1.png +0 -0
  48. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/vid2.png +0 -0
  49. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/vid3.png +0 -0
  50. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/vid4.png +0 -0
  51. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/vid5.png +0 -0
  52. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/images/vid6.png +0 -0
  53. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/results/news.html +0 -0
  54. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/results/news.png +0 -0
  55. {symbolicai-0.19.0 → symbolicai-0.20.1}/assets/results/news_prev.png +0 -0
  56. {symbolicai-0.19.0 → symbolicai-0.20.1}/bin/install.ps1 +0 -0
  57. {symbolicai-0.19.0 → symbolicai-0.20.1}/bin/install.sh +0 -0
  58. {symbolicai-0.19.0 → symbolicai-0.20.1}/build.py +0 -0
  59. {symbolicai-0.19.0 → symbolicai-0.20.1}/docker-compose.yml +0 -0
  60. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/ENGINES/clip_engine.md +0 -0
  61. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/ENGINES/custom_engine.md +0 -0
  62. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/ENGINES/drawing_engine.md +0 -0
  63. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/ENGINES/file_engine.md +0 -0
  64. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/ENGINES/indexing_engine.md +0 -0
  65. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/ENGINES/local_engine.md +0 -0
  66. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/ENGINES/neurosymbolic_engine.md +0 -0
  67. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/ENGINES/ocr_engine.md +0 -0
  68. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/ENGINES/speech_to_text_engine.md +0 -0
  69. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/ENGINES/symbolic_engine.md +0 -0
  70. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/ENGINES/webscraping_engine.md +0 -0
  71. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/FEATURES/contracts.md +0 -0
  72. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/FEATURES/error_handling.md +0 -0
  73. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/FEATURES/expressions.md +0 -0
  74. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/FEATURES/import.md +0 -0
  75. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/FEATURES/operations.md +0 -0
  76. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/FEATURES/primitives.md +0 -0
  77. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/INSTALLATION.md +0 -0
  78. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/INTRODUCTION.md +0 -0
  79. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/QUICKSTART.md +0 -0
  80. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/TOOLS/chatbot.md +0 -0
  81. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/TOOLS/packages.md +0 -0
  82. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/TOOLS/shell.md +0 -0
  83. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/TUTORIALS/chatbot.md +0 -0
  84. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/TUTORIALS/context.md +0 -0
  85. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/TUTORIALS/data_query.md +0 -0
  86. {symbolicai-0.19.0 → symbolicai-0.20.1}/docs/source/TUTORIALS/video_tutorials.md +0 -0
  87. {symbolicai-0.19.0 → symbolicai-0.20.1}/environment.yml +0 -0
  88. {symbolicai-0.19.0 → symbolicai-0.20.1}/examples/contracts.ipynb +0 -0
  89. {symbolicai-0.19.0 → symbolicai-0.20.1}/examples/primitives.ipynb +0 -0
  90. {symbolicai-0.19.0 → symbolicai-0.20.1}/icon_converter.py +0 -0
  91. {symbolicai-0.19.0 → symbolicai-0.20.1}/installer.py +0 -0
  92. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/Basics.ipynb +0 -0
  93. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/ChatBot.ipynb +0 -0
  94. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/Conversation.ipynb +0 -0
  95. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/Indexer.ipynb +0 -0
  96. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/News.ipynb +0 -0
  97. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/Queries.ipynb +0 -0
  98. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/TTS_Persona.ipynb +0 -0
  99. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/examples/Lean engine.png +0 -0
  100. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/examples/a_star.txt +0 -0
  101. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/examples/abstract.py +0 -0
  102. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/examples/audio.mp3 +0 -0
  103. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/examples/dbpedia_samples.jsonl +0 -0
  104. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/examples/dbpedia_samples_prepared_train.jsonl +0 -0
  105. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/examples/dbpedia_samples_prepared_valid.jsonl +0 -0
  106. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/examples/demo.py +0 -0
  107. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/examples/demo_strategy.py +0 -0
  108. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/examples/docs.py +0 -0
  109. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/examples/einsteins_puzzle.txt +0 -0
  110. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/examples/file.json +0 -0
  111. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/examples/lean.py +0 -0
  112. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/examples/news.py +0 -0
  113. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/examples/paper.pdf +0 -0
  114. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/examples/paper.py +0 -0
  115. {symbolicai-0.19.0 → symbolicai-0.20.1}/legacy/notebooks/examples/sql.py +0 -0
  116. {symbolicai-0.19.0 → symbolicai-0.20.1}/public/eai.svg +0 -0
  117. {symbolicai-0.19.0 → symbolicai-0.20.1}/pytest.ini +0 -0
  118. {symbolicai-0.19.0 → symbolicai-0.20.1}/setup.cfg +0 -0
  119. {symbolicai-0.19.0 → symbolicai-0.20.1}/setup.py +0 -0
  120. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/TERMS_OF_SERVICE.md +0 -0
  121. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/__init__.py +0 -0
  122. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/base.py +0 -0
  123. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/driver/webclient.py +0 -0
  124. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/__init__.py +0 -0
  125. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/drawing/engine_bfl.py +0 -0
  126. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/drawing/engine_gpt_image.py +0 -0
  127. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/embedding/engine_llama_cpp.py +0 -0
  128. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/embedding/engine_openai.py +0 -0
  129. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/embedding/engine_plugin_embeddings.py +0 -0
  130. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/execute/engine_python.py +0 -0
  131. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/files/engine_io.py +0 -0
  132. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/imagecaptioning/engine_blip2.py +0 -0
  133. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +0 -0
  134. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/index/engine_pinecone.py +0 -0
  135. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/index/engine_vectordb.py +0 -0
  136. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/lean/engine_lean4.py +0 -0
  137. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/neurosymbolic/__init__.py +0 -0
  138. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +0 -0
  139. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +0 -0
  140. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/neurosymbolic/engine_groq.py +0 -0
  141. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/neurosymbolic/engine_huggingface.py +0 -0
  142. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/neurosymbolic/engine_llama_cpp.py +0 -0
  143. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +0 -0
  144. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +0 -0
  145. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/ocr/engine_apilayer.py +0 -0
  146. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/output/engine_stdout.py +0 -0
  147. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/search/engine_perplexity.py +0 -0
  148. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/search/engine_serpapi.py +0 -0
  149. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/speech_to_text/engine_local_whisper.py +0 -0
  150. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/symbolic/engine_wolframalpha.py +0 -0
  151. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/text_to_speech/engine_openai.py +0 -0
  152. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/text_vision/engine_clip.py +0 -0
  153. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/engines/userinput/engine_console.py +0 -0
  154. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/mixin/__init__.py +0 -0
  155. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/mixin/deepseek.py +0 -0
  156. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/mixin/google.py +0 -0
  157. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/mixin/groq.py +0 -0
  158. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/mixin/openai.py +0 -0
  159. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/backend/settings.py +0 -0
  160. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/chat.py +0 -0
  161. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/collect/__init__.py +0 -0
  162. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/collect/dynamic.py +0 -0
  163. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/collect/pipeline.py +0 -0
  164. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/collect/stats.py +0 -0
  165. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/components.py +0 -0
  166. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/constraints.py +0 -0
  167. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/context.py +0 -0
  168. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/core.py +0 -0
  169. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/core_ext.py +0 -0
  170. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/endpoints/__init__py +0 -0
  171. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/endpoints/api.py +0 -0
  172. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/exceptions.py +0 -0
  173. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/__init__.py +0 -0
  174. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/api_builder.py +0 -0
  175. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/arxiv_pdf_parser.py +0 -0
  176. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/bibtex_parser.py +0 -0
  177. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/conversation.py +0 -0
  178. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/document.py +0 -0
  179. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/file_merger.py +0 -0
  180. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/graph.py +0 -0
  181. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/html_style_template.py +0 -0
  182. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/interfaces/__init__.py +0 -0
  183. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/interfaces/blip_2.py +0 -0
  184. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/interfaces/clip.py +0 -0
  185. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/interfaces/console.py +0 -0
  186. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/interfaces/dall_e.py +0 -0
  187. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/interfaces/file.py +0 -0
  188. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/interfaces/flux.py +0 -0
  189. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/interfaces/gpt_image.py +0 -0
  190. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/interfaces/input.py +0 -0
  191. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/interfaces/llava.py +0 -0
  192. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/interfaces/naive_vectordb.py +0 -0
  193. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/interfaces/ocr.py +0 -0
  194. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/interfaces/openai_search.py +0 -0
  195. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/interfaces/perplexity.py +0 -0
  196. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/interfaces/pinecone.py +0 -0
  197. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/interfaces/python.py +0 -0
  198. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/interfaces/serpapi.py +0 -0
  199. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/interfaces/terminal.py +0 -0
  200. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/interfaces/tts.py +0 -0
  201. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/interfaces/whisper.py +0 -0
  202. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/interfaces/wolframalpha.py +0 -0
  203. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/metrics/__init__.py +0 -0
  204. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/metrics/similarity.py +0 -0
  205. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/os_command.py +0 -0
  206. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/packages/__init__.py +0 -0
  207. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/packages/symdev.py +0 -0
  208. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/packages/sympkg.py +0 -0
  209. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/packages/symrun.py +0 -0
  210. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/personas/__init__.py +0 -0
  211. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/personas/builder.py +0 -0
  212. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/personas/dialogue.py +0 -0
  213. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/personas/persona.py +0 -0
  214. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/personas/research/__init__.py +0 -0
  215. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/personas/research/yann_lecun.py +0 -0
  216. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/personas/sales/__init__.py +0 -0
  217. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/personas/sales/erik_james.py +0 -0
  218. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/personas/student/__init__.py +0 -0
  219. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/personas/student/max_tenner.py +0 -0
  220. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/repo_cloner.py +0 -0
  221. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/seo_query_optimizer.py +0 -0
  222. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/solver.py +0 -0
  223. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/strategies/__init__.py +0 -0
  224. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/strategies/cot.py +0 -0
  225. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/summarizer.py +0 -0
  226. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/taypan_interpreter.py +0 -0
  227. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/extended/vectordb.py +0 -0
  228. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/formatter/__init__.py +0 -0
  229. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/formatter/emoji.pytxt +0 -0
  230. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/formatter/formatter.py +0 -0
  231. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/formatter/regex.py +0 -0
  232. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/functional.py +0 -0
  233. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/imports.py +0 -0
  234. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/interfaces.py +0 -0
  235. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/memory.py +0 -0
  236. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/menu/__init__.py +0 -0
  237. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/misc/__init__.py +0 -0
  238. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/misc/console.py +0 -0
  239. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/misc/loader.py +0 -0
  240. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/models/__init__.py +0 -0
  241. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/models/base.py +0 -0
  242. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/models/errors.py +0 -0
  243. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/ops/__init__.py +0 -0
  244. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/ops/measures.py +0 -0
  245. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/ops/primitives.py +0 -0
  246. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/post_processors.py +0 -0
  247. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/pre_processors.py +0 -0
  248. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/processor.py +0 -0
  249. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/prompts.py +0 -0
  250. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/server/__init__.py +0 -0
  251. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/server/huggingface_server.py +0 -0
  252. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/server/llama_cpp_server.py +0 -0
  253. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/shell.py +0 -0
  254. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/shellsv.py +0 -0
  255. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/strategy.py +0 -0
  256. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/symbol.py +0 -0
  257. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/symsh.md +0 -0
  258. {symbolicai-0.19.0 → symbolicai-0.20.1}/symai/utils.py +0 -0
  259. {symbolicai-0.19.0 → symbolicai-0.20.1}/symbolicai.egg-info/dependency_links.txt +0 -0
  260. {symbolicai-0.19.0 → symbolicai-0.20.1}/symbolicai.egg-info/entry_points.txt +0 -0
  261. {symbolicai-0.19.0 → symbolicai-0.20.1}/symbolicai.egg-info/top_level.txt +0 -0
  262. {symbolicai-0.19.0 → symbolicai-0.20.1}/tests/README.md +0 -0
  263. {symbolicai-0.19.0 → symbolicai-0.20.1}/tests/data/audio.mp3 +0 -0
  264. {symbolicai-0.19.0 → symbolicai-0.20.1}/tests/data/pg1727.txt +0 -0
  265. {symbolicai-0.19.0 → symbolicai-0.20.1}/tests/engines/search/perplexity_engine.py +0 -0
  266. {symbolicai-0.19.0 → symbolicai-0.20.1}/trusted_repos.yml +0 -0
@@ -1,24 +1,20 @@
1
- License
2
- =======
3
-
4
1
  BSD 3-Clause License
5
2
 
6
- Copyright (c) 2024, ExtensityAI FlexCo.
7
- All rights reserved.
3
+ Copyright (c) 2025, ExtensityAI FlexCo
8
4
 
9
5
  Redistribution and use in source and binary forms, with or without
10
6
  modification, are permitted provided that the following conditions are met:
11
7
 
12
- * Redistributions of source code must retain the above copyright notice, this
13
- list of conditions and the following disclaimer.
8
+ 1. Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
14
10
 
15
- * Redistributions in binary form must reproduce the above copyright notice,
16
- this list of conditions and the following disclaimer in the documentation
17
- and/or other materials provided with the distribution.
11
+ 2. Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
18
14
 
19
- * Neither the name of the copyright holder nor the names of its
20
- contributors may be used to endorse or promote products derived from
21
- this software without specific prior written permission.
15
+ 3. Neither the name of the copyright holder nor the names of its
16
+ contributors may be used to endorse or promote products derived from
17
+ this software without specific prior written permission.
22
18
 
23
19
  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24
20
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@@ -1,8 +1,37 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: symbolicai
3
- Version: 0.19.0
3
+ Version: 0.20.1
4
4
  Summary: A Neurosymbolic Perspective on Large Language Models
5
5
  Author-email: Marius-Constantin Dinu <marius@extensity.ai>, Leoveanu-Condrei Claudiu <leo@extensity.ai>
6
+ License: BSD 3-Clause License
7
+
8
+ Copyright (c) 2025, ExtensityAI FlexCo
9
+
10
+ Redistribution and use in source and binary forms, with or without
11
+ modification, are permitted provided that the following conditions are met:
12
+
13
+ 1. Redistributions of source code must retain the above copyright notice, this
14
+ list of conditions and the following disclaimer.
15
+
16
+ 2. Redistributions in binary form must reproduce the above copyright notice,
17
+ this list of conditions and the following disclaimer in the documentation
18
+ and/or other materials provided with the distribution.
19
+
20
+ 3. Neither the name of the copyright holder nor the names of its
21
+ contributors may be used to endorse or promote products derived from
22
+ this software without specific prior written permission.
23
+
24
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
28
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
+
6
35
  Project-URL: Homepage, https://extensity.ai
7
36
  Project-URL: GitHub, https://github.com/ExtensityAI/symbolicai
8
37
  Keywords: probabilistic programming,machine learning
@@ -11,6 +40,7 @@ Classifier: License :: OSI Approved :: BSD License
11
40
  Classifier: Operating System :: OS Independent
12
41
  Requires-Python: >=3.10
13
42
  Description-Content-Type: text/markdown
43
+ License-File: LICENSE
14
44
  Requires-Dist: attrs>=23.2.0
15
45
  Requires-Dist: setuptools>=70.0.0
16
46
  Requires-Dist: toml>=0.10.2
@@ -53,6 +83,7 @@ Requires-Dist: pycryptodome>=3.20.0
53
83
  Requires-Dist: httpx>=0.27.2
54
84
  Requires-Dist: nest-asyncio>=1.6.0
55
85
  Requires-Dist: rich>=13.9.4
86
+ Requires-Dist: playwright>=1.55.0
56
87
  Provides-Extra: bitsandbytes
57
88
  Requires-Dist: bitsandbytes>=0.43.1; extra == "bitsandbytes"
58
89
  Provides-Extra: blip2
@@ -69,6 +100,7 @@ Provides-Extra: webscraping
69
100
  Requires-Dist: beautifulsoup4>=4.12.3; extra == "webscraping"
70
101
  Requires-Dist: trafilatura>=2.0.0; extra == "webscraping"
71
102
  Requires-Dist: pdfminer.six; extra == "webscraping"
103
+ Requires-Dist: playwright>=1.45.0; extra == "webscraping"
72
104
  Provides-Extra: llama-cpp
73
105
  Requires-Dist: llama-cpp-python[server]>=0.3.7; extra == "llama-cpp"
74
106
  Provides-Extra: wolframalpha
@@ -92,6 +124,7 @@ Requires-Dist: symbolicai[webscraping]; extra == "all"
92
124
  Requires-Dist: symbolicai[serpapi]; extra == "all"
93
125
  Requires-Dist: symbolicai[services]; extra == "all"
94
126
  Requires-Dist: symbolicai[solver]; extra == "all"
127
+ Dynamic: license-file
95
128
 
96
129
  # **SymbolicAI: A neuro-symbolic perspective on LLMs**
97
130
  <img src="https://raw.githubusercontent.com/ExtensityAI/symbolicai/refs/heads/main/assets/images/banner.png">
@@ -420,7 +453,7 @@ Now, there are tools like DeepWiki that provide better documentation than we cou
420
453
 
421
454
  ## 📝 License
422
455
 
423
- This project is licensed under the BSD-3-Clause License - refer to [the docs](https://symbolicai.readthedocs.io/en/latest/LICENSE.html).
456
+ This project is licensed under the BSD-3-Clause License.
424
457
 
425
458
  ## Like this Project?
426
459
 
@@ -325,7 +325,7 @@ Now, there are tools like DeepWiki that provide better documentation than we cou
325
325
 
326
326
  ## 📝 License
327
327
 
328
- This project is licensed under the BSD-3-Clause License - refer to [the docs](https://symbolicai.readthedocs.io/en/latest/LICENSE.html).
328
+ This project is licensed under the BSD-3-Clause License.
329
329
 
330
330
  ## Like this Project?
331
331
 
@@ -78,7 +78,7 @@ res = search("What local events are happening today?",
78
78
  })
79
79
 
80
80
  # Control the amount of search context
81
- res = search("Explain quantum computing developments", search_context_size="high")
81
+ res = search("Explain quantum computing developments")
82
82
  ```
83
83
 
84
84
  Here's how to configure the OpenAI search engine:
@@ -36,6 +36,3 @@
36
36
  * [Chatbot CLI](TOOLS/chatbot.md)
37
37
  * [Package Manager](TOOLS/packages.md)
38
38
  * [Shell](TOOLS/shell.md)
39
-
40
- ## License
41
- * [License](LICENSE)
@@ -64,14 +64,15 @@ dependencies = [
64
64
  "pycryptodome>=3.20.0",
65
65
  "httpx>=0.27.2",
66
66
  "nest-asyncio>=1.6.0",
67
- "rich>=13.9.4"
67
+ "rich>=13.9.4",
68
+ "playwright>=1.55.0",
68
69
  ]
69
70
 
70
71
  [project.optional-dependencies]
71
72
  bitsandbytes = ["bitsandbytes>=0.43.1"] # handle separately because of Apple Silicon
72
73
  blip2 = ["decord>=0.6.0", "salesforce-lavis>=1.0.0", "opencv-python-headless>=4.5.5.64"]
73
74
  hf = ["transformers>=4.45.2", "accelerate>=0.33.0", "peft>=0.13.1", "datasets>=3.0.1", "trl>=0.11.3"]
74
- webscraping = ["beautifulsoup4>=4.12.3", "trafilatura>=2.0.0", "pdfminer.six"]
75
+ webscraping = ["beautifulsoup4>=4.12.3", "trafilatura>=2.0.0", "pdfminer.six", "playwright>=1.45.0"]
75
76
  llama_cpp = ["llama-cpp-python[server]>=0.3.7"] # handle separately since this dependency may not compile and require special maintenance
76
77
  wolframalpha = ["wolframalpha>=5.0.0"]
77
78
  whisper = ["openai-whisper>=20240930", "numba>=0.60.0"]
@@ -33,7 +33,7 @@ os.environ['TOKENIZERS_PARALLELISM'] = "false"
33
33
  # Create singleton instance
34
34
  config_manager = settings.SymAIConfig()
35
35
 
36
- SYMAI_VERSION = "0.19.0"
36
+ SYMAI_VERSION = "0.20.1"
37
37
  __version__ = SYMAI_VERSION
38
38
  __root_dir__ = config_manager.config_dir
39
39
 
@@ -56,7 +56,8 @@ class ClaudeXChatEngine(Engine, AnthropicMixin):
56
56
  self.config.get('NEUROSYMBOLIC_ENGINE_MODEL').startswith('claude') and \
57
57
  ('3-7' not in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') and \
58
58
  '4-0' not in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') and \
59
- '4-1' not in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL')):
59
+ '4-1' not in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') and \
60
+ '4-5' not in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL')):
60
61
  return 'neurosymbolic'
61
62
  return super().id() # default to unregistered
62
63
 
@@ -57,7 +57,8 @@ class ClaudeXReasoningEngine(Engine, AnthropicMixin):
57
57
  self.config.get('NEUROSYMBOLIC_ENGINE_MODEL').startswith('claude') and \
58
58
  ('3-7' in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') or \
59
59
  '4-0' in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') or \
60
- '4-1' in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL')):
60
+ '4-1' in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') or \
61
+ '4-5' in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL')):
61
62
  return 'neurosymbolic'
62
63
  return super().id() # default to unregistered
63
64
 
@@ -230,6 +230,8 @@ class SearchResult(Result):
230
230
 
231
231
 
232
232
  class GPTXSearchEngine(Engine):
233
+ MAX_ALLOWED_DOMAINS = 20
234
+
233
235
  def __init__(self, api_key: str | None = None, model: str | None = None):
234
236
  super().__init__()
235
237
  self.config = deepcopy(SYMAI_CONFIG)
@@ -250,6 +252,65 @@ class GPTXSearchEngine(Engine):
250
252
  return 'search'
251
253
  return super().id() # default to unregistered
252
254
 
255
+ def _extract_netloc(self, raw_domain: str | None) -> str | None:
256
+ if not isinstance(raw_domain, str):
257
+ return None
258
+ candidate = raw_domain.strip()
259
+ if not candidate:
260
+ return None
261
+ parsed = urlsplit(candidate if '://' in candidate else f"//{candidate}")
262
+ netloc = parsed.netloc or parsed.path
263
+ if not netloc:
264
+ return None
265
+ if '@' in netloc:
266
+ netloc = netloc.split('@', 1)[1]
267
+ if ':' in netloc:
268
+ netloc = netloc.split(':', 1)[0]
269
+ netloc = netloc.strip('.').strip()
270
+ if not netloc:
271
+ return None
272
+ return netloc.lower()
273
+
274
+ def _normalize_allowed_domains(self, domains: list[str] | None) -> list[str]:
275
+ if not domains or not isinstance(domains, list):
276
+ return []
277
+
278
+ normalized: list[str] = []
279
+ seen: set[str] = set()
280
+ for domain in domains:
281
+ netloc = self._extract_netloc(domain)
282
+ if not netloc or netloc in seen:
283
+ continue
284
+ # Validate that netloc is actually a valid domain
285
+ if not self._is_domain(netloc):
286
+ continue
287
+ normalized.append(netloc)
288
+ seen.add(netloc)
289
+ if len(normalized) >= self.MAX_ALLOWED_DOMAINS:
290
+ break
291
+ return normalized
292
+
293
+ def _is_domain(self, s: str) -> bool:
294
+ _label_re = re.compile(r"^[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?$")
295
+ if not s:
296
+ return False
297
+ host = s.strip().rstrip(".")
298
+ # If the input might be a URL, extract the hostname via urllib:
299
+ if "://" in host or "/" in host or "@" in host:
300
+ host = urlsplit(host if "://" in host else f"//{host}").hostname or ""
301
+ if not host:
302
+ return False
303
+ try:
304
+ host_ascii = host.encode("idna").decode("ascii")
305
+ except Exception:
306
+ return False
307
+ if len(host_ascii) > 253:
308
+ return False
309
+ labels = host_ascii.split(".")
310
+ if len(labels) < 2: # require a dot (reject "google")
311
+ return False
312
+ return all(_label_re.fullmatch(lbl or "") for lbl in labels)
313
+
253
314
  def command(self, *args, **kwargs):
254
315
  super().command(*args, **kwargs)
255
316
  if 'SEARCH_ENGINE_API_KEY' in kwargs:
@@ -261,20 +322,23 @@ class GPTXSearchEngine(Engine):
261
322
  messages = argument.prop.prepared_input
262
323
  kwargs = argument.kwargs
263
324
 
264
- tool_definition = {"type": "web_search_preview"}
325
+ tool_definition = {"type": "web_search"}
265
326
  user_location = kwargs.get('user_location')
266
327
  if user_location:
267
328
  tool_definition['user_location'] = user_location
268
- search_context_size = kwargs.get('search_context_size')
269
- if search_context_size:
270
- tool_definition['search_context_size'] = search_context_size
329
+
330
+ allowed_domains = self._normalize_allowed_domains(kwargs.get('allowed_domains'))
331
+ if allowed_domains:
332
+ tool_definition['filters'] = {
333
+ 'allowed_domains': allowed_domains
334
+ }
271
335
 
272
336
  self.model = kwargs.get('model', self.model) # Important for MetadataTracker to work correctly
273
337
  payload = {
274
338
  "model": self.model,
275
339
  "input": messages,
276
340
  "tools": [tool_definition],
277
- "tool_choice": {"type": "web_search_preview"} if self.model not in OPENAI_REASONING_MODELS else "auto" # force the use of web search tool for non-reasoning models
341
+ "tool_choice": {"type": "web_search"} if self.model not in OPENAI_REASONING_MODELS else "auto" # force the use of web search tool for non-reasoning models
278
342
  }
279
343
 
280
344
  try:
@@ -0,0 +1,323 @@
1
+ """
2
+ WARNING: This module implements a naive web scraping engine meant for light
3
+ testing. It does not prevent IP bans, bot detection, or terms-of-service
4
+ violations. Use only where scraping is legally permitted and respect each
5
+ site's robots directives. For production workloads, add robust rate limiting,
6
+ consent handling, rotating proxies/VPNs, and ongoing monitoring to avoid
7
+ service disruption.
8
+ """
9
+
10
+ import io
11
+ import logging
12
+ import re
13
+ from urllib.parse import parse_qsl, urlencode, urljoin, urlparse, urlunparse
14
+
15
+ import requests
16
+ import trafilatura
17
+ from bs4 import BeautifulSoup
18
+ from pdfminer.high_level import extract_text
19
+ from requests.structures import CaseInsensitiveDict
20
+
21
+ from ....symbol import Result
22
+ from ...base import Engine
23
+
24
+ logging.getLogger("pdfminer").setLevel(logging.WARNING)
25
+ logging.getLogger("trafilatura").setLevel(logging.WARNING)
26
+
27
+
28
+ class RequestsResult(Result):
29
+ def __init__(self, value, output_format="markdown", **kwargs) -> None:
30
+ super().__init__(value, **kwargs)
31
+ self.output_format = output_format
32
+ self.raw = value
33
+ self._value = self.extract()
34
+
35
+ def extract(self):
36
+ ctype = self.raw.headers.get("Content-Type", "").lower()
37
+ is_pdf = "application/pdf" in ctype or self.raw.url.lower().endswith(".pdf")
38
+ try:
39
+ if is_pdf:
40
+ with io.BytesIO(self.raw.content) as fh:
41
+ self._value = extract_text(fh)
42
+ else:
43
+ decoded = trafilatura.load_html(self.raw.content)
44
+ self._value = trafilatura.extract(decoded, output_format=self.output_format)
45
+ except Exception: # keep broad except to avoid hard failures
46
+ self._value = None
47
+ return self._value
48
+
49
+
50
+ class RequestsEngine(Engine):
51
+ """
52
+ Lightweight HTTP/Playwright fetching pipeline for content extraction.
53
+
54
+ The engine favors clarity over stealth. Helper methods normalize cookie
55
+ metadata before handing it to Playwright so that the headless browser and
56
+ the requests session stay aligned.
57
+ """
58
+
59
+ COMMON_BYPASS_COOKIES = {
60
+ # Some forums display consent or age gates once if a friendly cookie is set.
61
+ "cookieconsent_status": "allow",
62
+ "accepted_cookies": "yes",
63
+ "age_verified": "1",
64
+ }
65
+
66
+ DEFAULT_HEADERS = {
67
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
68
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
69
+ "Chrome/120.0.0.0 Safari/537.36",
70
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
71
+ "Accept-Language": "en-US,en;q=0.9",
72
+ "DNT": "1",
73
+ }
74
+
75
+ _SAMESITE_CANONICAL = {
76
+ "strict": "Strict",
77
+ "lax": "Lax",
78
+ "none": "None",
79
+ }
80
+
81
+ def __init__(self, timeout=15, verify_ssl=True, user_agent=None):
82
+ """
83
+ Args:
84
+ timeout: Seconds to wait for network operations before aborting.
85
+ verify_ssl: Toggle for TLS certificate verification.
86
+ user_agent: Optional override for the default desktop Chrome UA.
87
+ """
88
+ super().__init__()
89
+ self.timeout = timeout
90
+ self.verify_ssl = verify_ssl
91
+ self.name = self.__class__.__name__
92
+
93
+ headers = dict(self.DEFAULT_HEADERS)
94
+ if user_agent:
95
+ headers["User-Agent"] = user_agent
96
+
97
+ self.session = requests.Session()
98
+ self.session.headers.update(headers)
99
+
100
+ def _maybe_set_bypass_cookies(self, url: str):
101
+ netloc = urlparse(url).hostname
102
+ if not netloc:
103
+ return
104
+ for k, v in self.COMMON_BYPASS_COOKIES.items():
105
+ self.session.cookies.set(k, v, domain=netloc)
106
+
107
+ @staticmethod
108
+ def _normalize_http_only(raw_value, key_present):
109
+ """
110
+ Playwright expects a boolean. Cookie metadata can arrive as strings,
111
+ numbers, or placeholder objects, so normalize defensively.
112
+ """
113
+ if isinstance(raw_value, bool):
114
+ return raw_value
115
+ if isinstance(raw_value, str):
116
+ normalized = raw_value.strip().lower()
117
+ if normalized in {"false", "0", "no"}:
118
+ return False
119
+ if normalized in {"true", "1", "yes"}:
120
+ return True
121
+ if raw_value is None:
122
+ return key_present
123
+ return bool(raw_value)
124
+
125
+ @classmethod
126
+ def _normalize_same_site(cls, raw_value):
127
+ if raw_value is None:
128
+ return None
129
+ normalized = str(raw_value).strip().lower()
130
+ return cls._SAMESITE_CANONICAL.get(normalized)
131
+
132
+ def _playwright_cookie_payload(self, cookie, hostname):
133
+ """
134
+ Convert a requests cookie into Playwright-friendly format or return None
135
+ if the cookie does not apply to the hostname.
136
+ """
137
+ domain = (cookie.domain or hostname).lstrip(".")
138
+ if not hostname.endswith(domain):
139
+ return None
140
+
141
+ rest_attrs = {k.lower(): v for k, v in cookie._rest.items()}
142
+ http_only = self._normalize_http_only(rest_attrs.get("httponly"), "httponly" in rest_attrs)
143
+ payload = {
144
+ "name": cookie.name,
145
+ "value": cookie.value,
146
+ "domain": cookie.domain or hostname,
147
+ "path": cookie.path or "/",
148
+ "httpOnly": http_only,
149
+ "secure": cookie.secure,
150
+ }
151
+ if cookie.expires:
152
+ payload["expires"] = cookie.expires
153
+
154
+ same_site = self._normalize_same_site(rest_attrs.get("samesite"))
155
+ if same_site:
156
+ payload["sameSite"] = same_site
157
+ return payload
158
+
159
+ def _follow_meta_refresh(self, resp, timeout=15):
160
+ """
161
+ Some old forums use <meta http-equiv="refresh" content="0;url=...">
162
+ (sometimes to simulate a popup or interstitial). Follow it once.
163
+ """
164
+ ctype = resp.headers.get("Content-Type", "")
165
+ if "text/html" not in ctype.lower():
166
+ return resp
167
+ # Use apparent encoding to decode legacy charsets
168
+ soup = BeautifulSoup(resp.text, "html.parser")
169
+ resp.encoding = resp.encoding or resp.apparent_encoding
170
+ meta = soup.find("meta", attrs={"http-equiv": re.compile("^refresh$", re.I)})
171
+ if not meta or "content" not in meta.attrs:
172
+ return resp
173
+ m = re.search(r"url=(.+)", meta["content"], flags=re.I)
174
+ if not m:
175
+ return resp
176
+ refresh_url = m.group(1).strip().strip("'\"")
177
+ target = urljoin(resp.url, refresh_url)
178
+ # Avoid loops
179
+ if target == resp.url:
180
+ return resp
181
+ return self.session.get(target, timeout=timeout, allow_redirects=True)
182
+
183
+ def _fetch_with_playwright(self, url: str, wait_selector: str = None, wait_until: str = "networkidle", timeout: float = None):
184
+ """
185
+ Render the target URL in a headless browser to execute JavaScript and
186
+ return a synthetic ``requests.Response`` object to keep downstream
187
+ processing consistent with the non-JS path.
188
+ """
189
+ try:
190
+ from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
191
+ logging.getLogger("playwright").setLevel(logging.WARNING)
192
+ except ImportError as exc:
193
+ raise RuntimeError(
194
+ "Playwright is not installed. Install symbolicai[webscraping] with Playwright extras to enable render_js."
195
+ ) from exc
196
+
197
+ timeout_seconds = timeout if timeout is not None else self.timeout
198
+ timeout_ms = max(int(timeout_seconds * 1000), 0)
199
+ user_agent = self.session.headers.get("User-Agent")
200
+
201
+ parsed = urlparse(url)
202
+ hostname = parsed.hostname or ""
203
+ cookie_payload = []
204
+ if hostname:
205
+ for cookie in self.session.cookies:
206
+ payload = self._playwright_cookie_payload(cookie, hostname)
207
+ if payload:
208
+ cookie_payload.append(payload)
209
+
210
+ content = ""
211
+ final_url = url
212
+ status = 200
213
+ headers = CaseInsensitiveDict()
214
+
215
+ with sync_playwright() as playwright:
216
+ browser = playwright.chromium.launch(headless=True)
217
+ context = browser.new_context(
218
+ user_agent=user_agent,
219
+ java_script_enabled=True,
220
+ ignore_https_errors=not self.verify_ssl,
221
+ )
222
+ if cookie_payload:
223
+ context.add_cookies(cookie_payload)
224
+ page = context.new_page()
225
+
226
+ navigation_error = None
227
+ response = None
228
+ try:
229
+ try:
230
+ response = page.goto(url, wait_until=wait_until, timeout=timeout_ms)
231
+ if wait_selector:
232
+ page.wait_for_selector(wait_selector, timeout=timeout_ms)
233
+ except PlaywrightTimeoutError as exc:
234
+ navigation_error = exc
235
+
236
+ try:
237
+ content = page.content()
238
+ except Exception:
239
+ content = ""
240
+
241
+ # Always persist Playwright cookies back into the requests session.
242
+ for cookie in context.cookies():
243
+ self.session.cookies.set(
244
+ cookie["name"],
245
+ cookie["value"],
246
+ domain=cookie.get("domain"),
247
+ path=cookie.get("path", "/"),
248
+ )
249
+
250
+ final_url = page.url
251
+ status = response.status if response is not None else 200
252
+ headers = CaseInsensitiveDict(response.headers if response is not None else {})
253
+ if "content-type" not in headers:
254
+ headers["Content-Type"] = "text/html; charset=utf-8"
255
+
256
+ if navigation_error and not content:
257
+ raise requests.exceptions.Timeout(f"Playwright timed out while rendering {url}") from navigation_error
258
+ finally:
259
+ context.close()
260
+ browser.close()
261
+
262
+ rendered_response = requests.Response()
263
+ rendered_response.status_code = status
264
+ rendered_response._content = content.encode("utf-8", errors="replace")
265
+ rendered_response.url = final_url
266
+ rendered_response.headers = headers
267
+ rendered_response.encoding = "utf-8"
268
+ return rendered_response
269
+
270
+ def id(self) -> str:
271
+ return 'webscraping'
272
+
273
+ def forward(self, argument):
274
+ """
275
+ Return raw bytes of the final page body.
276
+ - Retries network errors (not programming bugs).
277
+ - Handles legacy redirects via meta refresh.
278
+ - Attempts to bypass simple consent/age popups by pre-seeding cookies.
279
+ """
280
+ url = argument.prop.prepared_input
281
+ kwargs = argument.kwargs
282
+ output_format = kwargs.get("output_format", "markdown")
283
+
284
+ self._maybe_set_bypass_cookies(url)
285
+
286
+ parsed = urlparse(url)
287
+ qs = [(k, v) for k, v in parse_qsl(parsed.query, keep_blank_values=True)
288
+ if k.lower() not in {"utm_source", "utm_medium", "utm_campaign"}]
289
+ clean_url = urlunparse(parsed._replace(query=urlencode(qs)))
290
+
291
+ render_js = kwargs.get("render_js")
292
+ render_wait_selector = kwargs.get("render_wait_selector")
293
+ render_wait_until = kwargs.get("render_wait_until", "networkidle")
294
+ render_timeout = kwargs.get("render_timeout")
295
+
296
+ # Prefer fast requests path unless the caller opts into JS rendering.
297
+ if render_js:
298
+ resp = self._fetch_with_playwright(
299
+ clean_url,
300
+ wait_selector=render_wait_selector,
301
+ wait_until=render_wait_until,
302
+ timeout=render_timeout,
303
+ )
304
+ else:
305
+ resp = self.session.get(clean_url, timeout=self.timeout, allow_redirects=True, verify=self.verify_ssl)
306
+ resp.raise_for_status()
307
+
308
+ # Follow a legacy meta refresh once (do AFTER normal HTTP redirects)
309
+ resp2 = self._follow_meta_refresh(resp, timeout=self.timeout)
310
+ if resp2 is not resp:
311
+ resp2.raise_for_status()
312
+ resp = resp2
313
+
314
+ metadata = {
315
+ "response_source": "playwright" if render_js else "requests",
316
+ "render_js": bool(render_js),
317
+ "final_url": resp.url,
318
+ }
319
+ result = RequestsResult(resp, output_format)
320
+ return [result], metadata
321
+
322
+ def prepare(self, argument):
323
+ argument.prop.prepared_input = str(argument.prop.url)
@@ -14,6 +14,8 @@ SUPPORTED_REASONING_MODELS = [
14
14
  "claude-opus-4-0",
15
15
  "claude-sonnet-4-0",
16
16
  'claude-3-7-sonnet-latest',
17
+ 'claude-haiku-4-5',
18
+ 'claude-sonnet-4-5',
17
19
  ]
18
20
 
19
21
  class AnthropicMixin:
@@ -22,6 +24,8 @@ class AnthropicMixin:
22
24
  self.model == 'claude-opus-4-0' or \
23
25
  self.model == 'claude-sonnet-4-0' or \
24
26
  self.model == 'claude-3-7-sonnet-latest' or \
27
+ self.model == 'claude-haiku-4-5' or \
28
+ self.model == 'claude-sonnet-4-5' or \
25
29
  self.model == 'claude-3-5-sonnet-latest' or \
26
30
  self.model == 'claude-3-5-sonnet-20241022' or \
27
31
  self.model == 'claude-3-5-sonnet-20240620' or \
@@ -33,7 +37,9 @@ class AnthropicMixin:
33
37
 
34
38
  def api_max_response_tokens(self):
35
39
  if self.model == 'claude-sonnet-4-0' or \
36
- self.model == 'claude-3-7-sonnet-latest':
40
+ self.model == 'claude-3-7-sonnet-latest' or \
41
+ self.model == 'claude-haiku-4-5' or \
42
+ self.model == 'claude-sonnet-4-5':
37
43
  return 64_000
38
44
  if self.model == 'claude-opus-4-1' or \
39
45
  self.model == 'claude-opus-4-0':
@@ -10,6 +10,8 @@ class naive_webscraping(Expression):
10
10
 
11
11
  def __call__(self, url: str, **kwargs) -> RequestsResult:
12
12
  @core.scrape(url=url, **kwargs)
13
- def _func(_) -> RequestsResult:
14
- pass
13
+ def _func(_, *args, **inner_kwargs) -> RequestsResult:
14
+ # The fallback path may inject debugging kwargs like `error`/`stack_trace`;
15
+ # accept and ignore them so EngineRepository can surface structured failures.
16
+ return None
15
17
  return _func(self)