symbolicai 0.19.0__tar.gz → 0.20.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {symbolicai-0.19.0/docs/source → symbolicai-0.20.0}/LICENSE +9 -13
- {symbolicai-0.19.0 → symbolicai-0.20.0}/PKG-INFO +35 -2
- {symbolicai-0.19.0 → symbolicai-0.20.0}/README.md +1 -1
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/ENGINES/search_engine.md +1 -1
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/SUMMARY.md +0 -3
- {symbolicai-0.19.0 → symbolicai-0.20.0}/pyproject.toml +3 -2
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/__init__.py +1 -1
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +2 -1
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +2 -1
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/search/engine_openai.py +2 -5
- symbolicai-0.20.0/symai/backend/engines/webscraping/engine_requests.py +323 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/mixin/anthropic.py +7 -1
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/interfaces/naive_webscraping.py +4 -2
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/menu/screen.py +9 -6
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symbolicai.egg-info/PKG-INFO +35 -2
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symbolicai.egg-info/SOURCES.txt +1 -1
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symbolicai.egg-info/requires.txt +2 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/tests/engines/search/openai_engine.py +1 -1
- {symbolicai-0.19.0 → symbolicai-0.20.0}/uv.lock +90 -3
- symbolicai-0.19.0/symai/backend/engines/webscraping/engine_requests.py +0 -128
- {symbolicai-0.19.0 → symbolicai-0.20.0}/.gitbook.yaml +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/.github/FUNDING.yml +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/.gitignore +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/.symai/symsh.config.json +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/CITATION.cff +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/Dockerfile +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/MANIFEST.in +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/app.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/banner.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/cat.jpg +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/cat.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/contract_flow.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/img1.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/img10.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/img2.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/img3.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/img4.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/img5.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/img6.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/img7.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/img8.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/img9.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/preview.gif +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/screen1.jpeg +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/symai_logo.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/symsh.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/vid1.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/vid2.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/vid3.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/vid4.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/vid5.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/images/vid6.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/results/news.html +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/results/news.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/assets/results/news_prev.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/bin/install.ps1 +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/bin/install.sh +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/build.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docker-compose.yml +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/ENGINES/clip_engine.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/ENGINES/custom_engine.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/ENGINES/drawing_engine.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/ENGINES/file_engine.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/ENGINES/indexing_engine.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/ENGINES/local_engine.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/ENGINES/neurosymbolic_engine.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/ENGINES/ocr_engine.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/ENGINES/speech_to_text_engine.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/ENGINES/symbolic_engine.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/ENGINES/webscraping_engine.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/FEATURES/contracts.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/FEATURES/error_handling.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/FEATURES/expressions.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/FEATURES/import.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/FEATURES/operations.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/FEATURES/primitives.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/INSTALLATION.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/INTRODUCTION.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/QUICKSTART.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/TOOLS/chatbot.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/TOOLS/packages.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/TOOLS/shell.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/TUTORIALS/chatbot.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/TUTORIALS/context.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/TUTORIALS/data_query.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/docs/source/TUTORIALS/video_tutorials.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/environment.yml +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/examples/contracts.ipynb +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/examples/primitives.ipynb +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/icon_converter.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/installer.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/Basics.ipynb +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/ChatBot.ipynb +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/Conversation.ipynb +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/Indexer.ipynb +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/News.ipynb +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/Queries.ipynb +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/TTS_Persona.ipynb +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/examples/Lean engine.png +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/examples/a_star.txt +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/examples/abstract.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/examples/audio.mp3 +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/examples/dbpedia_samples.jsonl +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/examples/dbpedia_samples_prepared_train.jsonl +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/examples/dbpedia_samples_prepared_valid.jsonl +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/examples/demo.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/examples/demo_strategy.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/examples/docs.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/examples/einsteins_puzzle.txt +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/examples/file.json +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/examples/lean.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/examples/news.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/examples/paper.pdf +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/examples/paper.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/legacy/notebooks/examples/sql.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/public/eai.svg +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/pytest.ini +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/setup.cfg +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/setup.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/TERMS_OF_SERVICE.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/__init__.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/base.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/driver/webclient.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/__init__.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/drawing/engine_bfl.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/drawing/engine_gpt_image.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/embedding/engine_llama_cpp.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/embedding/engine_openai.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/embedding/engine_plugin_embeddings.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/execute/engine_python.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/files/engine_io.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/imagecaptioning/engine_blip2.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/index/engine_pinecone.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/index/engine_vectordb.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/lean/engine_lean4.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/neurosymbolic/__init__.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/neurosymbolic/engine_groq.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/neurosymbolic/engine_huggingface.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/neurosymbolic/engine_llama_cpp.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/ocr/engine_apilayer.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/output/engine_stdout.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/search/engine_perplexity.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/search/engine_serpapi.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/speech_to_text/engine_local_whisper.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/symbolic/engine_wolframalpha.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/text_to_speech/engine_openai.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/text_vision/engine_clip.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/engines/userinput/engine_console.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/mixin/__init__.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/mixin/deepseek.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/mixin/google.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/mixin/groq.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/mixin/openai.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/backend/settings.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/chat.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/collect/__init__.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/collect/dynamic.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/collect/pipeline.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/collect/stats.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/components.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/constraints.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/context.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/core.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/core_ext.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/endpoints/__init__py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/endpoints/api.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/exceptions.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/__init__.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/api_builder.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/arxiv_pdf_parser.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/bibtex_parser.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/conversation.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/document.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/file_merger.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/graph.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/html_style_template.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/interfaces/__init__.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/interfaces/blip_2.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/interfaces/clip.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/interfaces/console.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/interfaces/dall_e.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/interfaces/file.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/interfaces/flux.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/interfaces/gpt_image.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/interfaces/input.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/interfaces/llava.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/interfaces/naive_vectordb.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/interfaces/ocr.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/interfaces/openai_search.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/interfaces/perplexity.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/interfaces/pinecone.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/interfaces/python.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/interfaces/serpapi.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/interfaces/terminal.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/interfaces/tts.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/interfaces/whisper.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/interfaces/wolframalpha.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/metrics/__init__.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/metrics/similarity.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/os_command.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/packages/__init__.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/packages/symdev.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/packages/sympkg.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/packages/symrun.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/personas/__init__.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/personas/builder.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/personas/dialogue.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/personas/persona.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/personas/research/__init__.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/personas/research/yann_lecun.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/personas/sales/__init__.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/personas/sales/erik_james.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/personas/student/__init__.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/personas/student/max_tenner.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/repo_cloner.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/seo_query_optimizer.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/solver.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/strategies/__init__.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/strategies/cot.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/summarizer.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/taypan_interpreter.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/extended/vectordb.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/formatter/__init__.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/formatter/emoji.pytxt +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/formatter/formatter.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/formatter/regex.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/functional.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/imports.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/interfaces.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/memory.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/menu/__init__.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/misc/__init__.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/misc/console.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/misc/loader.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/models/__init__.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/models/base.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/models/errors.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/ops/__init__.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/ops/measures.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/ops/primitives.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/post_processors.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/pre_processors.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/processor.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/prompts.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/server/__init__.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/server/huggingface_server.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/server/llama_cpp_server.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/shell.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/shellsv.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/strategy.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/symbol.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/symsh.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symai/utils.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symbolicai.egg-info/dependency_links.txt +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symbolicai.egg-info/entry_points.txt +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/symbolicai.egg-info/top_level.txt +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/tests/README.md +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/tests/data/audio.mp3 +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/tests/data/pg1727.txt +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/tests/engines/search/perplexity_engine.py +0 -0
- {symbolicai-0.19.0 → symbolicai-0.20.0}/trusted_repos.yml +0 -0
|
@@ -1,24 +1,20 @@
|
|
|
1
|
-
License
|
|
2
|
-
=======
|
|
3
|
-
|
|
4
1
|
BSD 3-Clause License
|
|
5
2
|
|
|
6
|
-
Copyright (c)
|
|
7
|
-
All rights reserved.
|
|
3
|
+
Copyright (c) 2025, ExtensityAI FlexCo
|
|
8
4
|
|
|
9
5
|
Redistribution and use in source and binary forms, with or without
|
|
10
6
|
modification, are permitted provided that the following conditions are met:
|
|
11
7
|
|
|
12
|
-
|
|
13
|
-
|
|
8
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
9
|
+
list of conditions and the following disclaimer.
|
|
14
10
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
12
|
+
this list of conditions and the following disclaimer in the documentation
|
|
13
|
+
and/or other materials provided with the distribution.
|
|
18
14
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
15
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
16
|
+
contributors may be used to endorse or promote products derived from
|
|
17
|
+
this software without specific prior written permission.
|
|
22
18
|
|
|
23
19
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
24
20
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
@@ -1,8 +1,37 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: symbolicai
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.20.0
|
|
4
4
|
Summary: A Neurosymbolic Perspective on Large Language Models
|
|
5
5
|
Author-email: Marius-Constantin Dinu <marius@extensity.ai>, Leoveanu-Condrei Claudiu <leo@extensity.ai>
|
|
6
|
+
License: BSD 3-Clause License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025, ExtensityAI FlexCo
|
|
9
|
+
|
|
10
|
+
Redistribution and use in source and binary forms, with or without
|
|
11
|
+
modification, are permitted provided that the following conditions are met:
|
|
12
|
+
|
|
13
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
14
|
+
list of conditions and the following disclaimer.
|
|
15
|
+
|
|
16
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
17
|
+
this list of conditions and the following disclaimer in the documentation
|
|
18
|
+
and/or other materials provided with the distribution.
|
|
19
|
+
|
|
20
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
21
|
+
contributors may be used to endorse or promote products derived from
|
|
22
|
+
this software without specific prior written permission.
|
|
23
|
+
|
|
24
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
25
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
26
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
27
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
28
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
29
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
30
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
31
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
32
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
33
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
34
|
+
|
|
6
35
|
Project-URL: Homepage, https://extensity.ai
|
|
7
36
|
Project-URL: GitHub, https://github.com/ExtensityAI/symbolicai
|
|
8
37
|
Keywords: probabilistic programming,machine learning
|
|
@@ -11,6 +40,7 @@ Classifier: License :: OSI Approved :: BSD License
|
|
|
11
40
|
Classifier: Operating System :: OS Independent
|
|
12
41
|
Requires-Python: >=3.10
|
|
13
42
|
Description-Content-Type: text/markdown
|
|
43
|
+
License-File: LICENSE
|
|
14
44
|
Requires-Dist: attrs>=23.2.0
|
|
15
45
|
Requires-Dist: setuptools>=70.0.0
|
|
16
46
|
Requires-Dist: toml>=0.10.2
|
|
@@ -53,6 +83,7 @@ Requires-Dist: pycryptodome>=3.20.0
|
|
|
53
83
|
Requires-Dist: httpx>=0.27.2
|
|
54
84
|
Requires-Dist: nest-asyncio>=1.6.0
|
|
55
85
|
Requires-Dist: rich>=13.9.4
|
|
86
|
+
Requires-Dist: playwright>=1.55.0
|
|
56
87
|
Provides-Extra: bitsandbytes
|
|
57
88
|
Requires-Dist: bitsandbytes>=0.43.1; extra == "bitsandbytes"
|
|
58
89
|
Provides-Extra: blip2
|
|
@@ -69,6 +100,7 @@ Provides-Extra: webscraping
|
|
|
69
100
|
Requires-Dist: beautifulsoup4>=4.12.3; extra == "webscraping"
|
|
70
101
|
Requires-Dist: trafilatura>=2.0.0; extra == "webscraping"
|
|
71
102
|
Requires-Dist: pdfminer.six; extra == "webscraping"
|
|
103
|
+
Requires-Dist: playwright>=1.45.0; extra == "webscraping"
|
|
72
104
|
Provides-Extra: llama-cpp
|
|
73
105
|
Requires-Dist: llama-cpp-python[server]>=0.3.7; extra == "llama-cpp"
|
|
74
106
|
Provides-Extra: wolframalpha
|
|
@@ -92,6 +124,7 @@ Requires-Dist: symbolicai[webscraping]; extra == "all"
|
|
|
92
124
|
Requires-Dist: symbolicai[serpapi]; extra == "all"
|
|
93
125
|
Requires-Dist: symbolicai[services]; extra == "all"
|
|
94
126
|
Requires-Dist: symbolicai[solver]; extra == "all"
|
|
127
|
+
Dynamic: license-file
|
|
95
128
|
|
|
96
129
|
# **SymbolicAI: A neuro-symbolic perspective on LLMs**
|
|
97
130
|
<img src="https://raw.githubusercontent.com/ExtensityAI/symbolicai/refs/heads/main/assets/images/banner.png">
|
|
@@ -420,7 +453,7 @@ Now, there are tools like DeepWiki that provide better documentation than we cou
|
|
|
420
453
|
|
|
421
454
|
## 📝 License
|
|
422
455
|
|
|
423
|
-
This project is licensed under the BSD-3-Clause License
|
|
456
|
+
This project is licensed under the BSD-3-Clause License.
|
|
424
457
|
|
|
425
458
|
## Like this Project?
|
|
426
459
|
|
|
@@ -325,7 +325,7 @@ Now, there are tools like DeepWiki that provide better documentation than we cou
|
|
|
325
325
|
|
|
326
326
|
## 📝 License
|
|
327
327
|
|
|
328
|
-
This project is licensed under the BSD-3-Clause License
|
|
328
|
+
This project is licensed under the BSD-3-Clause License.
|
|
329
329
|
|
|
330
330
|
## Like this Project?
|
|
331
331
|
|
|
@@ -78,7 +78,7 @@ res = search("What local events are happening today?",
|
|
|
78
78
|
})
|
|
79
79
|
|
|
80
80
|
# Control the amount of search context
|
|
81
|
-
res = search("Explain quantum computing developments"
|
|
81
|
+
res = search("Explain quantum computing developments")
|
|
82
82
|
```
|
|
83
83
|
|
|
84
84
|
Here's how to configure the OpenAI search engine:
|
|
@@ -64,14 +64,15 @@ dependencies = [
|
|
|
64
64
|
"pycryptodome>=3.20.0",
|
|
65
65
|
"httpx>=0.27.2",
|
|
66
66
|
"nest-asyncio>=1.6.0",
|
|
67
|
-
"rich>=13.9.4"
|
|
67
|
+
"rich>=13.9.4",
|
|
68
|
+
"playwright>=1.55.0",
|
|
68
69
|
]
|
|
69
70
|
|
|
70
71
|
[project.optional-dependencies]
|
|
71
72
|
bitsandbytes = ["bitsandbytes>=0.43.1"] # handle separately because of Apple Silicon
|
|
72
73
|
blip2 = ["decord>=0.6.0", "salesforce-lavis>=1.0.0", "opencv-python-headless>=4.5.5.64"]
|
|
73
74
|
hf = ["transformers>=4.45.2", "accelerate>=0.33.0", "peft>=0.13.1", "datasets>=3.0.1", "trl>=0.11.3"]
|
|
74
|
-
webscraping = ["beautifulsoup4>=4.12.3", "trafilatura>=2.0.0", "pdfminer.six"]
|
|
75
|
+
webscraping = ["beautifulsoup4>=4.12.3", "trafilatura>=2.0.0", "pdfminer.six", "playwright>=1.45.0"]
|
|
75
76
|
llama_cpp = ["llama-cpp-python[server]>=0.3.7"] # handle separately since this dependency may not compile and require special maintenance
|
|
76
77
|
wolframalpha = ["wolframalpha>=5.0.0"]
|
|
77
78
|
whisper = ["openai-whisper>=20240930", "numba>=0.60.0"]
|
|
@@ -56,7 +56,8 @@ class ClaudeXChatEngine(Engine, AnthropicMixin):
|
|
|
56
56
|
self.config.get('NEUROSYMBOLIC_ENGINE_MODEL').startswith('claude') and \
|
|
57
57
|
('3-7' not in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') and \
|
|
58
58
|
'4-0' not in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') and \
|
|
59
|
-
'4-1' not in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL')
|
|
59
|
+
'4-1' not in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') and \
|
|
60
|
+
'4-5' not in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL')):
|
|
60
61
|
return 'neurosymbolic'
|
|
61
62
|
return super().id() # default to unregistered
|
|
62
63
|
|
|
@@ -57,7 +57,8 @@ class ClaudeXReasoningEngine(Engine, AnthropicMixin):
|
|
|
57
57
|
self.config.get('NEUROSYMBOLIC_ENGINE_MODEL').startswith('claude') and \
|
|
58
58
|
('3-7' in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') or \
|
|
59
59
|
'4-0' in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') or \
|
|
60
|
-
'4-1' in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL')
|
|
60
|
+
'4-1' in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL') or \
|
|
61
|
+
'4-5' in self.config.get('NEUROSYMBOLIC_ENGINE_MODEL')):
|
|
61
62
|
return 'neurosymbolic'
|
|
62
63
|
return super().id() # default to unregistered
|
|
63
64
|
|
|
@@ -261,20 +261,17 @@ class GPTXSearchEngine(Engine):
|
|
|
261
261
|
messages = argument.prop.prepared_input
|
|
262
262
|
kwargs = argument.kwargs
|
|
263
263
|
|
|
264
|
-
tool_definition = {"type": "
|
|
264
|
+
tool_definition = {"type": "web_search"}
|
|
265
265
|
user_location = kwargs.get('user_location')
|
|
266
266
|
if user_location:
|
|
267
267
|
tool_definition['user_location'] = user_location
|
|
268
|
-
search_context_size = kwargs.get('search_context_size')
|
|
269
|
-
if search_context_size:
|
|
270
|
-
tool_definition['search_context_size'] = search_context_size
|
|
271
268
|
|
|
272
269
|
self.model = kwargs.get('model', self.model) # Important for MetadataTracker to work correctly
|
|
273
270
|
payload = {
|
|
274
271
|
"model": self.model,
|
|
275
272
|
"input": messages,
|
|
276
273
|
"tools": [tool_definition],
|
|
277
|
-
"tool_choice": {"type": "
|
|
274
|
+
"tool_choice": {"type": "web_search"} if self.model not in OPENAI_REASONING_MODELS else "auto" # force the use of web search tool for non-reasoning models
|
|
278
275
|
}
|
|
279
276
|
|
|
280
277
|
try:
|
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
"""
|
|
2
|
+
WARNING: This module implements a naive web scraping engine meant for light
|
|
3
|
+
testing. It does not prevent IP bans, bot detection, or terms-of-service
|
|
4
|
+
violations. Use only where scraping is legally permitted and respect each
|
|
5
|
+
site's robots directives. For production workloads, add robust rate limiting,
|
|
6
|
+
consent handling, rotating proxies/VPNs, and ongoing monitoring to avoid
|
|
7
|
+
service disruption.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import io
|
|
11
|
+
import logging
|
|
12
|
+
import re
|
|
13
|
+
from urllib.parse import parse_qsl, urlencode, urljoin, urlparse, urlunparse
|
|
14
|
+
|
|
15
|
+
import requests
|
|
16
|
+
import trafilatura
|
|
17
|
+
from bs4 import BeautifulSoup
|
|
18
|
+
from pdfminer.high_level import extract_text
|
|
19
|
+
from requests.structures import CaseInsensitiveDict
|
|
20
|
+
|
|
21
|
+
from ....symbol import Result
|
|
22
|
+
from ...base import Engine
|
|
23
|
+
|
|
24
|
+
logging.getLogger("pdfminer").setLevel(logging.WARNING)
|
|
25
|
+
logging.getLogger("trafilatura").setLevel(logging.WARNING)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class RequestsResult(Result):
|
|
29
|
+
def __init__(self, value, output_format="markdown", **kwargs) -> None:
|
|
30
|
+
super().__init__(value, **kwargs)
|
|
31
|
+
self.output_format = output_format
|
|
32
|
+
self.raw = value
|
|
33
|
+
self._value = self.extract()
|
|
34
|
+
|
|
35
|
+
def extract(self):
|
|
36
|
+
ctype = self.raw.headers.get("Content-Type", "").lower()
|
|
37
|
+
is_pdf = "application/pdf" in ctype or self.raw.url.lower().endswith(".pdf")
|
|
38
|
+
try:
|
|
39
|
+
if is_pdf:
|
|
40
|
+
with io.BytesIO(self.raw.content) as fh:
|
|
41
|
+
self._value = extract_text(fh)
|
|
42
|
+
else:
|
|
43
|
+
decoded = trafilatura.load_html(self.raw.content)
|
|
44
|
+
self._value = trafilatura.extract(decoded, output_format=self.output_format)
|
|
45
|
+
except Exception: # keep broad except to avoid hard failures
|
|
46
|
+
self._value = None
|
|
47
|
+
return self._value
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class RequestsEngine(Engine):
|
|
51
|
+
"""
|
|
52
|
+
Lightweight HTTP/Playwright fetching pipeline for content extraction.
|
|
53
|
+
|
|
54
|
+
The engine favors clarity over stealth. Helper methods normalize cookie
|
|
55
|
+
metadata before handing it to Playwright so that the headless browser and
|
|
56
|
+
the requests session stay aligned.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
COMMON_BYPASS_COOKIES = {
|
|
60
|
+
# Some forums display consent or age gates once if a friendly cookie is set.
|
|
61
|
+
"cookieconsent_status": "allow",
|
|
62
|
+
"accepted_cookies": "yes",
|
|
63
|
+
"age_verified": "1",
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
DEFAULT_HEADERS = {
|
|
67
|
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
68
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
69
|
+
"Chrome/120.0.0.0 Safari/537.36",
|
|
70
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
71
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
72
|
+
"DNT": "1",
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
_SAMESITE_CANONICAL = {
|
|
76
|
+
"strict": "Strict",
|
|
77
|
+
"lax": "Lax",
|
|
78
|
+
"none": "None",
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
def __init__(self, timeout=15, verify_ssl=True, user_agent=None):
|
|
82
|
+
"""
|
|
83
|
+
Args:
|
|
84
|
+
timeout: Seconds to wait for network operations before aborting.
|
|
85
|
+
verify_ssl: Toggle for TLS certificate verification.
|
|
86
|
+
user_agent: Optional override for the default desktop Chrome UA.
|
|
87
|
+
"""
|
|
88
|
+
super().__init__()
|
|
89
|
+
self.timeout = timeout
|
|
90
|
+
self.verify_ssl = verify_ssl
|
|
91
|
+
self.name = self.__class__.__name__
|
|
92
|
+
|
|
93
|
+
headers = dict(self.DEFAULT_HEADERS)
|
|
94
|
+
if user_agent:
|
|
95
|
+
headers["User-Agent"] = user_agent
|
|
96
|
+
|
|
97
|
+
self.session = requests.Session()
|
|
98
|
+
self.session.headers.update(headers)
|
|
99
|
+
|
|
100
|
+
def _maybe_set_bypass_cookies(self, url: str):
|
|
101
|
+
netloc = urlparse(url).hostname
|
|
102
|
+
if not netloc:
|
|
103
|
+
return
|
|
104
|
+
for k, v in self.COMMON_BYPASS_COOKIES.items():
|
|
105
|
+
self.session.cookies.set(k, v, domain=netloc)
|
|
106
|
+
|
|
107
|
+
@staticmethod
|
|
108
|
+
def _normalize_http_only(raw_value, key_present):
|
|
109
|
+
"""
|
|
110
|
+
Playwright expects a boolean. Cookie metadata can arrive as strings,
|
|
111
|
+
numbers, or placeholder objects, so normalize defensively.
|
|
112
|
+
"""
|
|
113
|
+
if isinstance(raw_value, bool):
|
|
114
|
+
return raw_value
|
|
115
|
+
if isinstance(raw_value, str):
|
|
116
|
+
normalized = raw_value.strip().lower()
|
|
117
|
+
if normalized in {"false", "0", "no"}:
|
|
118
|
+
return False
|
|
119
|
+
if normalized in {"true", "1", "yes"}:
|
|
120
|
+
return True
|
|
121
|
+
if raw_value is None:
|
|
122
|
+
return key_present
|
|
123
|
+
return bool(raw_value)
|
|
124
|
+
|
|
125
|
+
@classmethod
|
|
126
|
+
def _normalize_same_site(cls, raw_value):
|
|
127
|
+
if raw_value is None:
|
|
128
|
+
return None
|
|
129
|
+
normalized = str(raw_value).strip().lower()
|
|
130
|
+
return cls._SAMESITE_CANONICAL.get(normalized)
|
|
131
|
+
|
|
132
|
+
def _playwright_cookie_payload(self, cookie, hostname):
|
|
133
|
+
"""
|
|
134
|
+
Convert a requests cookie into Playwright-friendly format or return None
|
|
135
|
+
if the cookie does not apply to the hostname.
|
|
136
|
+
"""
|
|
137
|
+
domain = (cookie.domain or hostname).lstrip(".")
|
|
138
|
+
if not hostname.endswith(domain):
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
rest_attrs = {k.lower(): v for k, v in cookie._rest.items()}
|
|
142
|
+
http_only = self._normalize_http_only(rest_attrs.get("httponly"), "httponly" in rest_attrs)
|
|
143
|
+
payload = {
|
|
144
|
+
"name": cookie.name,
|
|
145
|
+
"value": cookie.value,
|
|
146
|
+
"domain": cookie.domain or hostname,
|
|
147
|
+
"path": cookie.path or "/",
|
|
148
|
+
"httpOnly": http_only,
|
|
149
|
+
"secure": cookie.secure,
|
|
150
|
+
}
|
|
151
|
+
if cookie.expires:
|
|
152
|
+
payload["expires"] = cookie.expires
|
|
153
|
+
|
|
154
|
+
same_site = self._normalize_same_site(rest_attrs.get("samesite"))
|
|
155
|
+
if same_site:
|
|
156
|
+
payload["sameSite"] = same_site
|
|
157
|
+
return payload
|
|
158
|
+
|
|
159
|
+
def _follow_meta_refresh(self, resp, timeout=15):
|
|
160
|
+
"""
|
|
161
|
+
Some old forums use <meta http-equiv="refresh" content="0;url=...">
|
|
162
|
+
(sometimes to simulate a popup or interstitial). Follow it once.
|
|
163
|
+
"""
|
|
164
|
+
ctype = resp.headers.get("Content-Type", "")
|
|
165
|
+
if "text/html" not in ctype.lower():
|
|
166
|
+
return resp
|
|
167
|
+
# Use apparent encoding to decode legacy charsets
|
|
168
|
+
soup = BeautifulSoup(resp.text, "html.parser")
|
|
169
|
+
resp.encoding = resp.encoding or resp.apparent_encoding
|
|
170
|
+
meta = soup.find("meta", attrs={"http-equiv": re.compile("^refresh$", re.I)})
|
|
171
|
+
if not meta or "content" not in meta.attrs:
|
|
172
|
+
return resp
|
|
173
|
+
m = re.search(r"url=(.+)", meta["content"], flags=re.I)
|
|
174
|
+
if not m:
|
|
175
|
+
return resp
|
|
176
|
+
refresh_url = m.group(1).strip().strip("'\"")
|
|
177
|
+
target = urljoin(resp.url, refresh_url)
|
|
178
|
+
# Avoid loops
|
|
179
|
+
if target == resp.url:
|
|
180
|
+
return resp
|
|
181
|
+
return self.session.get(target, timeout=timeout, allow_redirects=True)
|
|
182
|
+
|
|
183
|
+
def _fetch_with_playwright(self, url: str, wait_selector: str = None, wait_until: str = "networkidle", timeout: float = None):
|
|
184
|
+
"""
|
|
185
|
+
Render the target URL in a headless browser to execute JavaScript and
|
|
186
|
+
return a synthetic ``requests.Response`` object to keep downstream
|
|
187
|
+
processing consistent with the non-JS path.
|
|
188
|
+
"""
|
|
189
|
+
try:
|
|
190
|
+
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
|
|
191
|
+
logging.getLogger("playwright").setLevel(logging.WARNING)
|
|
192
|
+
except ImportError as exc:
|
|
193
|
+
raise RuntimeError(
|
|
194
|
+
"Playwright is not installed. Install symbolicai[webscraping] with Playwright extras to enable render_js."
|
|
195
|
+
) from exc
|
|
196
|
+
|
|
197
|
+
timeout_seconds = timeout if timeout is not None else self.timeout
|
|
198
|
+
timeout_ms = max(int(timeout_seconds * 1000), 0)
|
|
199
|
+
user_agent = self.session.headers.get("User-Agent")
|
|
200
|
+
|
|
201
|
+
parsed = urlparse(url)
|
|
202
|
+
hostname = parsed.hostname or ""
|
|
203
|
+
cookie_payload = []
|
|
204
|
+
if hostname:
|
|
205
|
+
for cookie in self.session.cookies:
|
|
206
|
+
payload = self._playwright_cookie_payload(cookie, hostname)
|
|
207
|
+
if payload:
|
|
208
|
+
cookie_payload.append(payload)
|
|
209
|
+
|
|
210
|
+
content = ""
|
|
211
|
+
final_url = url
|
|
212
|
+
status = 200
|
|
213
|
+
headers = CaseInsensitiveDict()
|
|
214
|
+
|
|
215
|
+
with sync_playwright() as playwright:
|
|
216
|
+
browser = playwright.chromium.launch(headless=True)
|
|
217
|
+
context = browser.new_context(
|
|
218
|
+
user_agent=user_agent,
|
|
219
|
+
java_script_enabled=True,
|
|
220
|
+
ignore_https_errors=not self.verify_ssl,
|
|
221
|
+
)
|
|
222
|
+
if cookie_payload:
|
|
223
|
+
context.add_cookies(cookie_payload)
|
|
224
|
+
page = context.new_page()
|
|
225
|
+
|
|
226
|
+
navigation_error = None
|
|
227
|
+
response = None
|
|
228
|
+
try:
|
|
229
|
+
try:
|
|
230
|
+
response = page.goto(url, wait_until=wait_until, timeout=timeout_ms)
|
|
231
|
+
if wait_selector:
|
|
232
|
+
page.wait_for_selector(wait_selector, timeout=timeout_ms)
|
|
233
|
+
except PlaywrightTimeoutError as exc:
|
|
234
|
+
navigation_error = exc
|
|
235
|
+
|
|
236
|
+
try:
|
|
237
|
+
content = page.content()
|
|
238
|
+
except Exception:
|
|
239
|
+
content = ""
|
|
240
|
+
|
|
241
|
+
# Always persist Playwright cookies back into the requests session.
|
|
242
|
+
for cookie in context.cookies():
|
|
243
|
+
self.session.cookies.set(
|
|
244
|
+
cookie["name"],
|
|
245
|
+
cookie["value"],
|
|
246
|
+
domain=cookie.get("domain"),
|
|
247
|
+
path=cookie.get("path", "/"),
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
final_url = page.url
|
|
251
|
+
status = response.status if response is not None else 200
|
|
252
|
+
headers = CaseInsensitiveDict(response.headers if response is not None else {})
|
|
253
|
+
if "content-type" not in headers:
|
|
254
|
+
headers["Content-Type"] = "text/html; charset=utf-8"
|
|
255
|
+
|
|
256
|
+
if navigation_error and not content:
|
|
257
|
+
raise requests.exceptions.Timeout(f"Playwright timed out while rendering {url}") from navigation_error
|
|
258
|
+
finally:
|
|
259
|
+
context.close()
|
|
260
|
+
browser.close()
|
|
261
|
+
|
|
262
|
+
rendered_response = requests.Response()
|
|
263
|
+
rendered_response.status_code = status
|
|
264
|
+
rendered_response._content = content.encode("utf-8", errors="replace")
|
|
265
|
+
rendered_response.url = final_url
|
|
266
|
+
rendered_response.headers = headers
|
|
267
|
+
rendered_response.encoding = "utf-8"
|
|
268
|
+
return rendered_response
|
|
269
|
+
|
|
270
|
+
def id(self) -> str:
|
|
271
|
+
return 'webscraping'
|
|
272
|
+
|
|
273
|
+
def forward(self, argument):
|
|
274
|
+
"""
|
|
275
|
+
Return raw bytes of the final page body.
|
|
276
|
+
- Retries network errors (not programming bugs).
|
|
277
|
+
- Handles legacy redirects via meta refresh.
|
|
278
|
+
- Attempts to bypass simple consent/age popups by pre-seeding cookies.
|
|
279
|
+
"""
|
|
280
|
+
url = argument.prop.prepared_input
|
|
281
|
+
kwargs = argument.kwargs
|
|
282
|
+
output_format = kwargs.get("output_format", "markdown")
|
|
283
|
+
|
|
284
|
+
self._maybe_set_bypass_cookies(url)
|
|
285
|
+
|
|
286
|
+
parsed = urlparse(url)
|
|
287
|
+
qs = [(k, v) for k, v in parse_qsl(parsed.query, keep_blank_values=True)
|
|
288
|
+
if k.lower() not in {"utm_source", "utm_medium", "utm_campaign"}]
|
|
289
|
+
clean_url = urlunparse(parsed._replace(query=urlencode(qs)))
|
|
290
|
+
|
|
291
|
+
render_js = kwargs.get("render_js")
|
|
292
|
+
render_wait_selector = kwargs.get("render_wait_selector")
|
|
293
|
+
render_wait_until = kwargs.get("render_wait_until", "networkidle")
|
|
294
|
+
render_timeout = kwargs.get("render_timeout")
|
|
295
|
+
|
|
296
|
+
# Prefer fast requests path unless the caller opts into JS rendering.
|
|
297
|
+
if render_js:
|
|
298
|
+
resp = self._fetch_with_playwright(
|
|
299
|
+
clean_url,
|
|
300
|
+
wait_selector=render_wait_selector,
|
|
301
|
+
wait_until=render_wait_until,
|
|
302
|
+
timeout=render_timeout,
|
|
303
|
+
)
|
|
304
|
+
else:
|
|
305
|
+
resp = self.session.get(clean_url, timeout=self.timeout, allow_redirects=True, verify=self.verify_ssl)
|
|
306
|
+
resp.raise_for_status()
|
|
307
|
+
|
|
308
|
+
# Follow a legacy meta refresh once (do AFTER normal HTTP redirects)
|
|
309
|
+
resp2 = self._follow_meta_refresh(resp, timeout=self.timeout)
|
|
310
|
+
if resp2 is not resp:
|
|
311
|
+
resp2.raise_for_status()
|
|
312
|
+
resp = resp2
|
|
313
|
+
|
|
314
|
+
metadata = {
|
|
315
|
+
"response_source": "playwright" if render_js else "requests",
|
|
316
|
+
"render_js": bool(render_js),
|
|
317
|
+
"final_url": resp.url,
|
|
318
|
+
}
|
|
319
|
+
result = RequestsResult(resp, output_format)
|
|
320
|
+
return [result], metadata
|
|
321
|
+
|
|
322
|
+
def prepare(self, argument):
|
|
323
|
+
argument.prop.prepared_input = str(argument.prop.url)
|
|
@@ -14,6 +14,8 @@ SUPPORTED_REASONING_MODELS = [
|
|
|
14
14
|
"claude-opus-4-0",
|
|
15
15
|
"claude-sonnet-4-0",
|
|
16
16
|
'claude-3-7-sonnet-latest',
|
|
17
|
+
'claude-haiku-4-5',
|
|
18
|
+
'claude-sonnet-4-5',
|
|
17
19
|
]
|
|
18
20
|
|
|
19
21
|
class AnthropicMixin:
|
|
@@ -22,6 +24,8 @@ class AnthropicMixin:
|
|
|
22
24
|
self.model == 'claude-opus-4-0' or \
|
|
23
25
|
self.model == 'claude-sonnet-4-0' or \
|
|
24
26
|
self.model == 'claude-3-7-sonnet-latest' or \
|
|
27
|
+
self.model == 'claude-haiku-4-5' or \
|
|
28
|
+
self.model == 'claude-sonnet-4-5' or \
|
|
25
29
|
self.model == 'claude-3-5-sonnet-latest' or \
|
|
26
30
|
self.model == 'claude-3-5-sonnet-20241022' or \
|
|
27
31
|
self.model == 'claude-3-5-sonnet-20240620' or \
|
|
@@ -33,7 +37,9 @@ class AnthropicMixin:
|
|
|
33
37
|
|
|
34
38
|
def api_max_response_tokens(self):
|
|
35
39
|
if self.model == 'claude-sonnet-4-0' or \
|
|
36
|
-
self.model == 'claude-3-7-sonnet-latest'
|
|
40
|
+
self.model == 'claude-3-7-sonnet-latest' or \
|
|
41
|
+
self.model == 'claude-haiku-4-5' or \
|
|
42
|
+
self.model == 'claude-sonnet-4-5':
|
|
37
43
|
return 64_000
|
|
38
44
|
if self.model == 'claude-opus-4-1' or \
|
|
39
45
|
self.model == 'claude-opus-4-0':
|
|
@@ -10,6 +10,8 @@ class naive_webscraping(Expression):
|
|
|
10
10
|
|
|
11
11
|
def __call__(self, url: str, **kwargs) -> RequestsResult:
|
|
12
12
|
@core.scrape(url=url, **kwargs)
|
|
13
|
-
def _func(_) -> RequestsResult:
|
|
14
|
-
|
|
13
|
+
def _func(_, *args, **inner_kwargs) -> RequestsResult:
|
|
14
|
+
# The fallback path may inject debugging kwargs like `error`/`stack_trace`;
|
|
15
|
+
# accept and ignore them so EngineRepository can surface structured failures.
|
|
16
|
+
return None
|
|
15
17
|
return _func(self)
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
1
3
|
from prompt_toolkit import print_formatted_text
|
|
2
4
|
|
|
3
5
|
from ..misc.console import ConsoleStyle
|
|
@@ -33,12 +35,13 @@ def show_separator(print: callable = print_formatted_text):
|
|
|
33
35
|
|
|
34
36
|
|
|
35
37
|
def show_intro_menu():
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
38
|
+
if os.environ.get('SYMAI_WARNINGS', '1') == '1':
|
|
39
|
+
with ConsoleStyle('extensity') as console:
|
|
40
|
+
show_splash_screen(print=console.print)
|
|
41
|
+
with ConsoleStyle('text') as console:
|
|
42
|
+
show_info_message(print=console.print)
|
|
43
|
+
with ConsoleStyle('extensity') as console:
|
|
44
|
+
show_separator(print=console.print)
|
|
42
45
|
|
|
43
46
|
if __name__ == '__main__':
|
|
44
47
|
show_intro_menu()
|