symbolicai 1.4.0__tar.gz → 1.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {symbolicai-1.4.0 → symbolicai-1.5.0}/PKG-INFO +2 -1
- symbolicai-1.5.0/docs/source/ENGINES/scrape_engine.md +143 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/ENGINES/search_engine.md +72 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/pyproject.toml +1 -1
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/__init__.py +1 -1
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/scrape/engine_requests.py +39 -10
- symbolicai-1.5.0/symai/backend/engines/search/__init__.py +13 -0
- symbolicai-1.5.0/symai/backend/engines/search/engine_firecrawl.py +333 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/search/engine_parallel.py +5 -5
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/components.py +9 -3
- symbolicai-1.5.0/symai/extended/interfaces/firecrawl.py +30 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/parallel.py +5 -5
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/functional.py +3 -4
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symbolicai.egg-info/PKG-INFO +2 -1
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symbolicai.egg-info/SOURCES.txt +3 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symbolicai.egg-info/requires.txt +1 -0
- symbolicai-1.5.0/uv.lock +9197 -0
- symbolicai-1.4.0/docs/source/ENGINES/scrape_engine.md +0 -43
- symbolicai-1.4.0/uv.lock +0 -7700
- {symbolicai-1.4.0 → symbolicai-1.5.0}/.gitbook.yaml +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/.github/FUNDING.yml +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/.gitignore +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/.symai/symsh.config.json +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/AGENTS.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/CITATION.cff +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/Dockerfile +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/LICENSE +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/MANIFEST.in +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/README.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/app.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/banner.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/cat.jpg +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/cat.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/contract_flow.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/img1.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/img10.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/img2.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/img3.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/img4.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/img5.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/img6.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/img7.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/img8.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/img9.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/preview.gif +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/screen1.jpeg +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/symai_logo.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/symsh.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/vid1.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/vid2.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/vid3.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/vid4.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/vid5.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/images/vid6.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/results/news.html +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/results/news.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/assets/results/news_prev.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/bin/install.ps1 +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/bin/install.sh +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/build.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docker-compose.yml +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/ENGINES/clip_engine.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/ENGINES/custom_engine.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/ENGINES/drawing_engine.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/ENGINES/file_engine.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/ENGINES/indexing_engine.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/ENGINES/local_engine.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/ENGINES/neurosymbolic_engine.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/ENGINES/ocr_engine.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/ENGINES/speech_to_text_engine.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/ENGINES/symbolic_engine.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/FEATURES/contracts.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/FEATURES/error_handling.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/FEATURES/expressions.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/FEATURES/import.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/FEATURES/operations.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/FEATURES/primitives.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/INSTALLATION.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/INTRODUCTION.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/QUICKSTART.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/SUMMARY.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/TOOLS/chatbot.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/TOOLS/packages.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/TOOLS/shell.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/TUTORIALS/chatbot.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/TUTORIALS/context.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/TUTORIALS/data_query.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/docs/source/TUTORIALS/video_tutorials.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/environment.yml +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/examples/contracts.ipynb +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/examples/primitives.ipynb +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/icon_converter.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/installer.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/Basics.ipynb +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/ChatBot.ipynb +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/Conversation.ipynb +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/Indexer.ipynb +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/News.ipynb +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/Queries.ipynb +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/TTS_Persona.ipynb +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/examples/Lean engine.png +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/examples/a_star.txt +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/examples/abstract.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/examples/audio.mp3 +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/examples/dbpedia_samples.jsonl +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/examples/dbpedia_samples_prepared_train.jsonl +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/examples/dbpedia_samples_prepared_valid.jsonl +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/examples/demo.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/examples/demo_strategy.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/examples/docs.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/examples/einsteins_puzzle.txt +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/examples/file.json +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/examples/lean.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/examples/news.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/examples/paper.pdf +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/examples/paper.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/legacy/notebooks/examples/sql.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/public/eai.svg +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/pytest.ini +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/ruff.toml +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/setup.cfg +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/setup.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/TERMS_OF_SERVICE.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/__init__.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/base.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/__init__.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/drawing/engine_bfl.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/drawing/engine_gpt_image.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/embedding/engine_llama_cpp.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/embedding/engine_openai.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/execute/engine_python.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/files/engine_io.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/imagecaptioning/engine_blip2.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/imagecaptioning/engine_llavacpp_client.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/index/engine_pinecone.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/index/engine_qdrant.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/index/engine_vectordb.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/lean/engine_lean4.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/__init__.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_chat.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_anthropic_claudeX_reasoning.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_cerebras.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_deepseekX_reasoning.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_google_geminiX_reasoning.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_groq.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_huggingface.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_llama_cpp.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_openai_gptX_chat.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_openai_gptX_reasoning.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/neurosymbolic/engine_openai_responses.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/ocr/engine_apilayer.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/output/engine_stdout.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/search/engine_openai.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/search/engine_perplexity.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/search/engine_serpapi.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/speech_to_text/engine_local_whisper.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/symbolic/engine_wolframalpha.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/text_to_speech/engine_openai.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/text_vision/engine_clip.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/engines/userinput/engine_console.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/mixin/__init__.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/mixin/anthropic.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/mixin/cerebras.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/mixin/deepseek.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/mixin/google.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/mixin/groq.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/mixin/openai.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/backend/settings.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/chat.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/collect/__init__.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/collect/dynamic.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/collect/pipeline.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/collect/stats.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/constraints.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/context.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/core.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/core_ext.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/endpoints/__init__py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/endpoints/api.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/exceptions.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/.DS_Store +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/__init__.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/api_builder.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/arxiv_pdf_parser.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/bibtex_parser.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/conversation.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/document.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/file_merger.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/graph.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/html_style_template.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/__init__.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/blip_2.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/clip.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/console.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/dall_e.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/file.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/flux.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/gpt_image.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/input.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/llava.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/local_search.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/naive_scrape.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/naive_vectordb.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/ocr.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/openai_search.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/perplexity.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/pinecone.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/python.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/serpapi.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/terminal.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/tts.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/whisper.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/interfaces/wolframalpha.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/metrics/__init__.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/metrics/similarity.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/os_command.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/packages/__init__.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/packages/symdev.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/packages/sympkg.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/packages/symrun.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/repo_cloner.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/seo_query_optimizer.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/solver.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/summarizer.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/taypan_interpreter.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/extended/vectordb.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/formatter/__init__.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/formatter/emoji.pytxt +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/formatter/formatter.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/formatter/regex.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/imports.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/interfaces.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/memory.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/menu/__init__.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/menu/screen.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/misc/__init__.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/misc/console.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/misc/loader.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/models/__init__.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/models/base.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/models/errors.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/ops/__init__.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/ops/measures.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/ops/primitives.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/post_processors.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/pre_processors.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/processor.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/prompts.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/server/__init__.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/server/huggingface_server.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/server/llama_cpp_server.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/server/qdrant_server.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/shell.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/shellsv.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/strategy.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/symbol.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/symsh.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symai/utils.py +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symbolicai.egg-info/dependency_links.txt +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symbolicai.egg-info/entry_points.txt +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/symbolicai.egg-info/top_level.txt +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/tests/README.md +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/tests/data/audio.mp3 +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/tests/data/pg1727.txt +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/tests/data/symmetry_breaking.pdf +0 -0
- {symbolicai-1.4.0 → symbolicai-1.5.0}/trusted_repos.yml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: symbolicai
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.5.0
|
|
4
4
|
Summary: A Neurosymbolic Perspective on Large Language Models
|
|
5
5
|
Author-email: Marius-Constantin Dinu <marius@extensity.ai>, Leoveanu-Condrei Claudiu <leo@extensity.ai>
|
|
6
6
|
License: BSD 3-Clause License
|
|
@@ -113,6 +113,7 @@ Requires-Dist: openai-whisper>=20240930; extra == "whisper"
|
|
|
113
113
|
Requires-Dist: numba>=0.62.1; extra == "whisper"
|
|
114
114
|
Requires-Dist: llvmlite>=0.45.1; extra == "whisper"
|
|
115
115
|
Provides-Extra: search
|
|
116
|
+
Requires-Dist: firecrawl-py>=4.12.0; extra == "search"
|
|
116
117
|
Requires-Dist: parallel-web>=0.3.3; extra == "search"
|
|
117
118
|
Provides-Extra: serpapi
|
|
118
119
|
Requires-Dist: google_search_results>=2.4.2; extra == "serpapi"
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# Scrape Engine
|
|
2
|
+
|
|
3
|
+
## Naive Scrape
|
|
4
|
+
|
|
5
|
+
To access data from the web, we can use the `naive_scrape` interface. The engine underneath is very lightweight and can be used to scrape data from websites. It is based on the `requests` library, as well as `trafilatura` for output formatting, and `bs4` for HTML parsing. `trafilatura` currently supports the following output formats: `json`, `csv`, `html`, `markdown`, `text`, `xml`
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
from symai.interfaces import Interface
|
|
9
|
+
|
|
10
|
+
scraper = Interface("naive_scrape")
|
|
11
|
+
url = "https://docs.astral.sh/uv/guides/scripts/#next-steps"
|
|
12
|
+
res = scraper(url)
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Parallel (Parallel.ai)
|
|
16
|
+
|
|
17
|
+
The Parallel.ai integration routes scrape calls through the official `parallel-web` SDK and can handle PDFs, JavaScript-heavy feeds, and standard HTML pages in the same workflow. Instantiate the Parallel interface and call `.scrape(...)` with the target URL. The engine detects scrape requests automatically whenever a URL is supplied.
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
from symai.extended import Interface
|
|
21
|
+
|
|
22
|
+
scraper = Interface("parallel")
|
|
23
|
+
article = scraper.scrape(
|
|
24
|
+
"https://trafilatura.readthedocs.io/en/latest/crawls.html",
|
|
25
|
+
full_content=True, # optional: request full document text
|
|
26
|
+
excerpts=True, # optional: default True, retain excerpt snippets
|
|
27
|
+
objective="Summarize crawl guidance for internal notes."
|
|
28
|
+
)
|
|
29
|
+
print(str(article))
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Configuration requires a Parallel API key and the Parallel model token. Add the following to your settings:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
{
|
|
36
|
+
…
|
|
37
|
+
"SEARCH_ENGINE_API_KEY": "…",
|
|
38
|
+
"SEARCH_ENGINE_MODEL": "parallel"
|
|
39
|
+
…
|
|
40
|
+
}
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
When invoked with a URL, the engine hits Parallel's Extract API and returns an `ExtractResult`. The result string joins excerpts or the full content if requested. Because processing is offloaded to Parallel's hosted infrastructure, the engine remains reliable on dynamic pages that the naive scraper cannot render. Install the dependency with `pip install parallel-web` before enabling this engine.
|
|
44
|
+
|
|
45
|
+
## Firecrawl
|
|
46
|
+
|
|
47
|
+
Firecrawl.dev specializes in reliable web scraping with automatic handling of JavaScript-rendered content, proxies, and anti-bot mechanisms. It converts web pages into clean formats suitable for LLM consumption and supports advanced features like actions, caching, and location-based scraping.
|
|
48
|
+
|
|
49
|
+
### Examples
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from symai.extended import Interface
|
|
53
|
+
|
|
54
|
+
scraper = Interface("firecrawl")
|
|
55
|
+
|
|
56
|
+
# Example 1: Basic webpage scraping
|
|
57
|
+
content = scraper.scrape(
|
|
58
|
+
"https://docs.firecrawl.dev/introduction",
|
|
59
|
+
formats=["markdown"]
|
|
60
|
+
)
|
|
61
|
+
print(content)
|
|
62
|
+
|
|
63
|
+
# Example 2: PDF scraping with content extraction and trimming
|
|
64
|
+
pdf_full = scraper.scrape(
|
|
65
|
+
"https://pmc.ncbi.nlm.nih.gov/articles/PMC7231600",
|
|
66
|
+
only_main_content=True,
|
|
67
|
+
formats=["markdown"],
|
|
68
|
+
proxy="auto"
|
|
69
|
+
)
|
|
70
|
+
# Trim locally if needed
|
|
71
|
+
pdf_trimmed = str(pdf_full)[:100]
|
|
72
|
+
|
|
73
|
+
# Note: JS-heavy sites like Twitter/LinkedIn are currently not fully supported
|
|
74
|
+
# They typically return 403 Forbidden errors (may vary by subscription tier)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### Configuration
|
|
78
|
+
|
|
79
|
+
Enable the engine by configuring Firecrawl credentials:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
{
|
|
83
|
+
"SEARCH_ENGINE_API_KEY": "fc-your-api-key",
|
|
84
|
+
"SEARCH_ENGINE_MODEL": "firecrawl"
|
|
85
|
+
}
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### JSON Schema Extraction
|
|
89
|
+
|
|
90
|
+
Firecrawl supports structured data extraction using JSON schemas. This is useful for extracting specific fields from web pages using LLM-powered extraction:
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
from pydantic import Field
|
|
94
|
+
from symai.extended import Interface
|
|
95
|
+
from symai.models import LLMDataModel
|
|
96
|
+
|
|
97
|
+
class MetadataModel(LLMDataModel):
|
|
98
|
+
"""Bibliographic metadata extracted from a source document."""
|
|
99
|
+
title: str = Field(description="Title of the source.")
|
|
100
|
+
year: str = Field(description="Publication year (4 digits) or Unknown.")
|
|
101
|
+
authors: list[str] = Field(default_factory=list, description="List of authors.")
|
|
102
|
+
doi: str | None = Field(default=None, description="DOI if available.")
|
|
103
|
+
|
|
104
|
+
# Build JSON format config from Pydantic schema
|
|
105
|
+
schema = MetadataModel.model_json_schema()
|
|
106
|
+
json_format = {
|
|
107
|
+
"type": "json",
|
|
108
|
+
"prompt": "Extract bibliographic metadata from this academic paper.",
|
|
109
|
+
"schema": schema,
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
scraper = Interface("firecrawl")
|
|
113
|
+
result = scraper.scrape(
|
|
114
|
+
"https://journals.physiology.org/doi/full/10.1152/ajpregu.00051.2002",
|
|
115
|
+
formats=[json_format],
|
|
116
|
+
proxy="auto"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Access extracted data as dict
|
|
120
|
+
extracted = result.raw["json"]
|
|
121
|
+
metadata = MetadataModel(**extracted)
|
|
122
|
+
print(metadata.model_dump())
|
|
123
|
+
|
|
124
|
+
# Or as JSON string
|
|
125
|
+
print(str(result))
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Supported Parameters
|
|
129
|
+
|
|
130
|
+
The engine supports many parameters (passed as kwargs). Common ones include:
|
|
131
|
+
- **formats**: Output formats (["markdown"], ["html"], ["rawHtml"])
|
|
132
|
+
- **only_main_content**: Extract main content only (boolean)
|
|
133
|
+
- **proxy**: Proxy mode ("basic", "stealth", "auto")
|
|
134
|
+
- **location**: Geographic location object with optional country and languages
|
|
135
|
+
- Example: `{"country": "US"}` or `{"country": "RO", "languages": ["ro"]}`
|
|
136
|
+
- **maxAge**: Cache duration in seconds (integer)
|
|
137
|
+
- **storeInCache**: Enable caching (boolean)
|
|
138
|
+
- **actions**: Page interactions before scraping (list of action objects)
|
|
139
|
+
- Example: `[{"type": "wait", "milliseconds": 2000}]`
|
|
140
|
+
- Example: `[{"type": "click", "selector": ".button"}]`
|
|
141
|
+
- Example: `[{"type": "scroll", "direction": "down", "amount": 500}]`
|
|
142
|
+
|
|
143
|
+
Check the Firecrawl v2 API documentation for the complete list of available parameters and action types.
|
|
@@ -152,3 +152,75 @@ Here's how to configure the OpenAI search engine:
|
|
|
152
152
|
```
|
|
153
153
|
|
|
154
154
|
This engine calls the OpenAI Responses API under the hood. When you target a reasoning-capable model, pass a `reasoning` dictionary matching the Responses payload schema (for example `{"effort": "low", "summary": "auto"}`). If omitted, the engine falls back to the default effort/summary settings shown above.
|
|
155
|
+
|
|
156
|
+
## Firecrawl
|
|
157
|
+
Firecrawl.dev provides web scraping and search capabilities with built-in handling of dynamic JavaScript content and anti-bot mechanisms. The engine converts web pages into clean markdown and can perform web searches across multiple sources with advanced filtering and content extraction.
|
|
158
|
+
|
|
159
|
+
### Comprehensive Search Example
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
from symai.extended import Interface
|
|
163
|
+
|
|
164
|
+
engine = Interface("firecrawl")
|
|
165
|
+
|
|
166
|
+
# Example 1: Location-aware search with language, scraping, and citations
|
|
167
|
+
result = engine.search(
|
|
168
|
+
"who is nicusor dan",
|
|
169
|
+
limit=5,
|
|
170
|
+
location="Romania",
|
|
171
|
+
lang="ro",
|
|
172
|
+
sources=["web"],
|
|
173
|
+
formats=["markdown"],
|
|
174
|
+
only_main_content=True,
|
|
175
|
+
proxy="stealth"
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Access structured citations (similar to parallel.ai)
|
|
179
|
+
citations = result.get_citations()
|
|
180
|
+
for citation in citations:
|
|
181
|
+
print(f"[{citation.id}] {citation.title}: {citation.url}")
|
|
182
|
+
|
|
183
|
+
# Example 2: Domain-filtered search with character limits
|
|
184
|
+
domains = ["arxiv.org", "nature.com"]
|
|
185
|
+
filters = " OR ".join(f"site:{domain}" for domain in domains)
|
|
186
|
+
query = f"({filters}) what is thermodynamic computing"
|
|
187
|
+
|
|
188
|
+
result = engine.search(
|
|
189
|
+
query,
|
|
190
|
+
limit=10,
|
|
191
|
+
max_chars_per_result=500,
|
|
192
|
+
categories=["research"],
|
|
193
|
+
formats=["markdown"],
|
|
194
|
+
proxy="basic"
|
|
195
|
+
)
|
|
196
|
+
print(result)
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
### Configuration
|
|
200
|
+
|
|
201
|
+
Enable the engine by configuring Firecrawl credentials:
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
{
|
|
205
|
+
"SEARCH_ENGINE_API_KEY": "fc-your-api-key",
|
|
206
|
+
"SEARCH_ENGINE_MODEL": "firecrawl"
|
|
207
|
+
}
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### Supported Parameters
|
|
211
|
+
|
|
212
|
+
The engine supports many parameters (passed as kwargs). Common ones include:
|
|
213
|
+
- **limit**: Max number of results
|
|
214
|
+
- **location**: Country code string for search (e.g., "Romania", "Germany")
|
|
215
|
+
- **lang**: Language code string for search (e.g., "ro", "es") - hint, not enforcement
|
|
216
|
+
- **sources**: List of sources (["web"], ["news"], ["images"])
|
|
217
|
+
- **categories**: Content types (["research"], ["github"], ["pdf"])
|
|
218
|
+
- **tbs**: Time-based filter (e.g., "qdr:d" for past day)
|
|
219
|
+
- **formats**: Output formats for scraped content (["markdown"], ["html"])
|
|
220
|
+
- **only_main_content**: Extract main content only when scraping (boolean)
|
|
221
|
+
- **max_chars_per_result**: Truncate results locally (integer)
|
|
222
|
+
- **proxy**: Proxy mode for scraping ("basic", "stealth", "auto")
|
|
223
|
+
- **scrape_location**: Location object for scraping with optional country and languages
|
|
224
|
+
- Example: `{"country": "US"}` or `{"country": "RO", "languages": ["ro"]}`
|
|
225
|
+
|
|
226
|
+
Check the Firecrawl v2 API documentation for the complete list of available parameters.
|
|
@@ -78,7 +78,7 @@ scrape = ["beautifulsoup4>=4.12.3", "trafilatura>=2.0.0", "pdfminer.six",
|
|
|
78
78
|
llama_cpp = ["llama-cpp-python[server]>=0.3.7"] # handle separately since this dependency may not compile and require special maintenance
|
|
79
79
|
wolframalpha = ["wolframalpha>=5.0.0"]
|
|
80
80
|
whisper = ["openai-whisper>=20240930", "numba>=0.62.1", "llvmlite>=0.45.1"]
|
|
81
|
-
search = ["parallel-web>=0.3.3"]
|
|
81
|
+
search = ["firecrawl-py>=4.12.0", "parallel-web>=0.3.3"]
|
|
82
82
|
serpapi = ["google_search_results>=2.4.2"]
|
|
83
83
|
services = ["fastapi>=0.110.0", "redis>=5.0.2", "uvicorn>=0.27.1"]
|
|
84
84
|
solver = ["z3-solver>=4.12.6.0"]
|
|
@@ -9,6 +9,7 @@ service disruption.
|
|
|
9
9
|
|
|
10
10
|
import io
|
|
11
11
|
import logging
|
|
12
|
+
import random
|
|
12
13
|
import re
|
|
13
14
|
from typing import Any, ClassVar
|
|
14
15
|
from urllib.parse import parse_qsl, urlencode, urljoin, urlparse, urlunparse
|
|
@@ -17,7 +18,9 @@ import requests
|
|
|
17
18
|
import trafilatura
|
|
18
19
|
from bs4 import BeautifulSoup
|
|
19
20
|
from pdfminer.high_level import extract_text
|
|
21
|
+
from requests.adapters import HTTPAdapter
|
|
20
22
|
from requests.structures import CaseInsensitiveDict
|
|
23
|
+
from urllib3.util.retry import Retry
|
|
21
24
|
|
|
22
25
|
from ....symbol import Result
|
|
23
26
|
from ....utils import UserMessage
|
|
@@ -80,24 +83,49 @@ class RequestsEngine(Engine):
|
|
|
80
83
|
"none": "None",
|
|
81
84
|
}
|
|
82
85
|
|
|
83
|
-
|
|
86
|
+
USER_AGENT_POOL: ClassVar[list[str]] = [
|
|
87
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
88
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
89
|
+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
90
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0",
|
|
91
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0",
|
|
92
|
+
"Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0",
|
|
93
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
|
|
94
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
def __init__(self, timeout=15, verify_ssl=True, user_agent=None, retries=3, backoff_factor=0.5, retry_status_codes=(500, 502, 503, 504)):
|
|
84
98
|
"""
|
|
85
99
|
Args:
|
|
86
100
|
timeout: Seconds to wait for network operations before aborting.
|
|
87
101
|
verify_ssl: Toggle for TLS certificate verification.
|
|
88
|
-
user_agent: Optional override for
|
|
102
|
+
user_agent: Optional override for user agent rotation.
|
|
103
|
+
retries: Number of retries for failed requests (default: 3).
|
|
104
|
+
backoff_factor: Multiplier for exponential backoff (default: 0.5).
|
|
105
|
+
retry_status_codes: HTTP status codes to retry on (default: 500, 502, 503, 504).
|
|
89
106
|
"""
|
|
90
107
|
super().__init__()
|
|
91
108
|
self.timeout = timeout
|
|
92
109
|
self.verify_ssl = verify_ssl
|
|
93
110
|
self.name = self.__class__.__name__
|
|
94
|
-
|
|
95
|
-
headers = dict(self.DEFAULT_HEADERS)
|
|
96
|
-
if user_agent:
|
|
97
|
-
headers["User-Agent"] = user_agent
|
|
111
|
+
self._user_agent_override = user_agent
|
|
98
112
|
|
|
99
113
|
self.session = requests.Session()
|
|
100
|
-
self.session.headers.update(
|
|
114
|
+
self.session.headers.update({k: v for k, v in self.DEFAULT_HEADERS.items() if k != "User-Agent"})
|
|
115
|
+
|
|
116
|
+
retry_strategy = Retry(
|
|
117
|
+
total=retries,
|
|
118
|
+
backoff_factor=backoff_factor,
|
|
119
|
+
status_forcelist=retry_status_codes,
|
|
120
|
+
allowed_methods=["GET", "HEAD"],
|
|
121
|
+
)
|
|
122
|
+
adapter = HTTPAdapter(max_retries=retry_strategy)
|
|
123
|
+
self.session.mount("http://", adapter)
|
|
124
|
+
self.session.mount("https://", adapter)
|
|
125
|
+
|
|
126
|
+
def _get_user_agent(self) -> str:
|
|
127
|
+
"""Return user agent: override if set, otherwise random from pool."""
|
|
128
|
+
return self._user_agent_override or random.choice(self.USER_AGENT_POOL)
|
|
101
129
|
|
|
102
130
|
def _maybe_set_bypass_cookies(self, url: str):
|
|
103
131
|
netloc = urlparse(url).hostname
|
|
@@ -232,7 +260,7 @@ class RequestsEngine(Engine):
|
|
|
232
260
|
# Avoid loops
|
|
233
261
|
if target == resp.url:
|
|
234
262
|
return resp
|
|
235
|
-
return self.session.get(target, timeout=timeout, allow_redirects=True)
|
|
263
|
+
return self.session.get(target, timeout=timeout, allow_redirects=True, headers={"User-Agent": self._get_user_agent()})
|
|
236
264
|
|
|
237
265
|
def _fetch_with_playwright(
|
|
238
266
|
self,
|
|
@@ -259,7 +287,7 @@ class RequestsEngine(Engine):
|
|
|
259
287
|
|
|
260
288
|
timeout_seconds = timeout if timeout is not None else self.timeout
|
|
261
289
|
timeout_ms = max(int(timeout_seconds * 1000), 0)
|
|
262
|
-
user_agent = self.
|
|
290
|
+
user_agent = self._get_user_agent()
|
|
263
291
|
|
|
264
292
|
parsed = urlparse(url)
|
|
265
293
|
hostname = parsed.hostname or ""
|
|
@@ -348,7 +376,8 @@ class RequestsEngine(Engine):
|
|
|
348
376
|
)
|
|
349
377
|
else:
|
|
350
378
|
resp = self.session.get(
|
|
351
|
-
clean_url, timeout=self.timeout, allow_redirects=True, verify=self.verify_ssl
|
|
379
|
+
clean_url, timeout=self.timeout, allow_redirects=True, verify=self.verify_ssl,
|
|
380
|
+
headers={"User-Agent": self._get_user_agent()}
|
|
352
381
|
)
|
|
353
382
|
resp.raise_for_status()
|
|
354
383
|
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from .engine_firecrawl import FirecrawlEngine
|
|
2
|
+
from .engine_parallel import ParallelEngine
|
|
3
|
+
|
|
4
|
+
SEARCH_ENGINE_MAPPING = {
|
|
5
|
+
"firecrawl": FirecrawlEngine,
|
|
6
|
+
"parallel": ParallelEngine,
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"SEARCH_ENGINE_MAPPING",
|
|
11
|
+
"FirecrawlEngine",
|
|
12
|
+
"ParallelEngine",
|
|
13
|
+
]
|