webscout 8.2.9__py3-none-any.whl → 2026.1.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- webscout/AIauto.py +524 -251
- webscout/AIbase.py +247 -319
- webscout/AIutel.py +68 -703
- webscout/Bard.py +1072 -1026
- webscout/Extra/GitToolkit/__init__.py +10 -10
- webscout/Extra/GitToolkit/gitapi/__init__.py +20 -12
- webscout/Extra/GitToolkit/gitapi/gist.py +142 -0
- webscout/Extra/GitToolkit/gitapi/organization.py +91 -0
- webscout/Extra/GitToolkit/gitapi/repository.py +308 -195
- webscout/Extra/GitToolkit/gitapi/search.py +162 -0
- webscout/Extra/GitToolkit/gitapi/trending.py +236 -0
- webscout/Extra/GitToolkit/gitapi/user.py +128 -96
- webscout/Extra/GitToolkit/gitapi/utils.py +82 -62
- webscout/Extra/YTToolkit/README.md +443 -375
- webscout/Extra/YTToolkit/YTdownloader.py +953 -957
- webscout/Extra/YTToolkit/__init__.py +3 -3
- webscout/Extra/YTToolkit/transcriber.py +595 -476
- webscout/Extra/YTToolkit/ytapi/README.md +230 -44
- webscout/Extra/YTToolkit/ytapi/__init__.py +22 -6
- webscout/Extra/YTToolkit/ytapi/captions.py +190 -0
- webscout/Extra/YTToolkit/ytapi/channel.py +302 -307
- webscout/Extra/YTToolkit/ytapi/errors.py +13 -13
- webscout/Extra/YTToolkit/ytapi/extras.py +178 -118
- webscout/Extra/YTToolkit/ytapi/hashtag.py +120 -0
- webscout/Extra/YTToolkit/ytapi/https.py +89 -88
- webscout/Extra/YTToolkit/ytapi/patterns.py +61 -61
- webscout/Extra/YTToolkit/ytapi/playlist.py +59 -59
- webscout/Extra/YTToolkit/ytapi/pool.py +8 -8
- webscout/Extra/YTToolkit/ytapi/query.py +143 -40
- webscout/Extra/YTToolkit/ytapi/shorts.py +122 -0
- webscout/Extra/YTToolkit/ytapi/stream.py +68 -63
- webscout/Extra/YTToolkit/ytapi/suggestions.py +97 -0
- webscout/Extra/YTToolkit/ytapi/utils.py +66 -62
- webscout/Extra/YTToolkit/ytapi/video.py +403 -232
- webscout/Extra/__init__.py +2 -3
- webscout/Extra/gguf.py +1298 -684
- webscout/Extra/tempmail/README.md +487 -487
- webscout/Extra/tempmail/__init__.py +28 -28
- webscout/Extra/tempmail/async_utils.py +143 -141
- webscout/Extra/tempmail/base.py +172 -161
- webscout/Extra/tempmail/cli.py +191 -187
- webscout/Extra/tempmail/emailnator.py +88 -84
- webscout/Extra/tempmail/mail_tm.py +378 -361
- webscout/Extra/tempmail/temp_mail_io.py +304 -292
- webscout/Extra/weather.py +196 -194
- webscout/Extra/weather_ascii.py +17 -15
- webscout/Provider/AISEARCH/PERPLEXED_search.py +175 -0
- webscout/Provider/AISEARCH/Perplexity.py +292 -333
- webscout/Provider/AISEARCH/README.md +106 -279
- webscout/Provider/AISEARCH/__init__.py +16 -9
- webscout/Provider/AISEARCH/brave_search.py +298 -0
- webscout/Provider/AISEARCH/iask_search.py +357 -410
- webscout/Provider/AISEARCH/monica_search.py +200 -220
- webscout/Provider/AISEARCH/webpilotai_search.py +242 -255
- webscout/Provider/Algion.py +413 -0
- webscout/Provider/Andi.py +74 -69
- webscout/Provider/Apriel.py +313 -0
- webscout/Provider/Ayle.py +323 -0
- webscout/Provider/ChatSandbox.py +329 -342
- webscout/Provider/ClaudeOnline.py +365 -0
- webscout/Provider/Cohere.py +232 -208
- webscout/Provider/DeepAI.py +367 -0
- webscout/Provider/Deepinfra.py +467 -340
- webscout/Provider/EssentialAI.py +217 -0
- webscout/Provider/ExaAI.py +274 -261
- webscout/Provider/Gemini.py +175 -169
- webscout/Provider/GithubChat.py +385 -369
- webscout/Provider/Gradient.py +286 -0
- webscout/Provider/Groq.py +556 -801
- webscout/Provider/HadadXYZ.py +323 -0
- webscout/Provider/HeckAI.py +392 -375
- webscout/Provider/HuggingFace.py +387 -0
- webscout/Provider/IBM.py +340 -0
- webscout/Provider/Jadve.py +317 -291
- webscout/Provider/K2Think.py +306 -0
- webscout/Provider/Koboldai.py +221 -384
- webscout/Provider/Netwrck.py +273 -270
- webscout/Provider/Nvidia.py +310 -0
- webscout/Provider/OPENAI/DeepAI.py +489 -0
- webscout/Provider/OPENAI/K2Think.py +423 -0
- webscout/Provider/OPENAI/PI.py +463 -0
- webscout/Provider/OPENAI/README.md +890 -952
- webscout/Provider/OPENAI/TogetherAI.py +405 -0
- webscout/Provider/OPENAI/TwoAI.py +255 -357
- webscout/Provider/OPENAI/__init__.py +148 -40
- webscout/Provider/OPENAI/ai4chat.py +348 -293
- webscout/Provider/OPENAI/akashgpt.py +436 -0
- webscout/Provider/OPENAI/algion.py +303 -0
- webscout/Provider/OPENAI/{exachat.py → ayle.py} +365 -444
- webscout/Provider/OPENAI/base.py +253 -249
- webscout/Provider/OPENAI/cerebras.py +296 -0
- webscout/Provider/OPENAI/chatgpt.py +870 -556
- webscout/Provider/OPENAI/chatsandbox.py +233 -173
- webscout/Provider/OPENAI/deepinfra.py +403 -322
- webscout/Provider/OPENAI/e2b.py +2370 -1414
- webscout/Provider/OPENAI/elmo.py +278 -0
- webscout/Provider/OPENAI/exaai.py +452 -417
- webscout/Provider/OPENAI/freeassist.py +446 -0
- webscout/Provider/OPENAI/gradient.py +448 -0
- webscout/Provider/OPENAI/groq.py +380 -364
- webscout/Provider/OPENAI/hadadxyz.py +292 -0
- webscout/Provider/OPENAI/heckai.py +333 -308
- webscout/Provider/OPENAI/huggingface.py +321 -0
- webscout/Provider/OPENAI/ibm.py +425 -0
- webscout/Provider/OPENAI/llmchat.py +253 -0
- webscout/Provider/OPENAI/llmchatco.py +378 -335
- webscout/Provider/OPENAI/meta.py +541 -0
- webscout/Provider/OPENAI/netwrck.py +374 -357
- webscout/Provider/OPENAI/nvidia.py +317 -0
- webscout/Provider/OPENAI/oivscode.py +348 -287
- webscout/Provider/OPENAI/openrouter.py +328 -0
- webscout/Provider/OPENAI/pydantic_imports.py +1 -172
- webscout/Provider/OPENAI/sambanova.py +397 -0
- webscout/Provider/OPENAI/sonus.py +305 -304
- webscout/Provider/OPENAI/textpollinations.py +370 -339
- webscout/Provider/OPENAI/toolbaz.py +375 -413
- webscout/Provider/OPENAI/typefully.py +419 -355
- webscout/Provider/OPENAI/typliai.py +279 -0
- webscout/Provider/OPENAI/utils.py +314 -318
- webscout/Provider/OPENAI/wisecat.py +359 -387
- webscout/Provider/OPENAI/writecream.py +185 -163
- webscout/Provider/OPENAI/x0gpt.py +462 -365
- webscout/Provider/OPENAI/zenmux.py +380 -0
- webscout/Provider/OpenRouter.py +386 -0
- webscout/Provider/Openai.py +337 -496
- webscout/Provider/PI.py +443 -429
- webscout/Provider/QwenLM.py +346 -254
- webscout/Provider/STT/__init__.py +28 -0
- webscout/Provider/STT/base.py +303 -0
- webscout/Provider/STT/elevenlabs.py +264 -0
- webscout/Provider/Sambanova.py +317 -0
- webscout/Provider/TTI/README.md +69 -82
- webscout/Provider/TTI/__init__.py +37 -7
- webscout/Provider/TTI/base.py +147 -64
- webscout/Provider/TTI/claudeonline.py +393 -0
- webscout/Provider/TTI/magicstudio.py +292 -201
- webscout/Provider/TTI/miragic.py +180 -0
- webscout/Provider/TTI/pollinations.py +331 -221
- webscout/Provider/TTI/together.py +334 -0
- webscout/Provider/TTI/utils.py +14 -11
- webscout/Provider/TTS/README.md +186 -192
- webscout/Provider/TTS/__init__.py +43 -10
- webscout/Provider/TTS/base.py +523 -159
- webscout/Provider/TTS/deepgram.py +286 -156
- webscout/Provider/TTS/elevenlabs.py +189 -111
- webscout/Provider/TTS/freetts.py +218 -0
- webscout/Provider/TTS/murfai.py +288 -113
- webscout/Provider/TTS/openai_fm.py +364 -129
- webscout/Provider/TTS/parler.py +203 -111
- webscout/Provider/TTS/qwen.py +334 -0
- webscout/Provider/TTS/sherpa.py +286 -0
- webscout/Provider/TTS/speechma.py +693 -580
- webscout/Provider/TTS/streamElements.py +275 -333
- webscout/Provider/TTS/utils.py +280 -280
- webscout/Provider/TextPollinationsAI.py +331 -308
- webscout/Provider/TogetherAI.py +450 -0
- webscout/Provider/TwoAI.py +309 -475
- webscout/Provider/TypliAI.py +311 -305
- webscout/Provider/UNFINISHED/ChatHub.py +219 -209
- webscout/Provider/{OPENAI/glider.py → UNFINISHED/ChutesAI.py} +331 -326
- webscout/Provider/{GizAI.py → UNFINISHED/GizAI.py} +300 -295
- webscout/Provider/{Marcus.py → UNFINISHED/Marcus.py} +218 -198
- webscout/Provider/UNFINISHED/Qodo.py +481 -0
- webscout/Provider/{MCPCore.py → UNFINISHED/XenAI.py} +330 -315
- webscout/Provider/UNFINISHED/Youchat.py +347 -330
- webscout/Provider/UNFINISHED/aihumanizer.py +41 -0
- webscout/Provider/UNFINISHED/grammerchecker.py +37 -0
- webscout/Provider/UNFINISHED/liner.py +342 -0
- webscout/Provider/UNFINISHED/liner_api_request.py +246 -263
- webscout/Provider/{samurai.py → UNFINISHED/samurai.py} +231 -224
- webscout/Provider/WiseCat.py +256 -233
- webscout/Provider/WrDoChat.py +390 -370
- webscout/Provider/__init__.py +115 -174
- webscout/Provider/ai4chat.py +181 -174
- webscout/Provider/akashgpt.py +330 -335
- webscout/Provider/cerebras.py +397 -290
- webscout/Provider/cleeai.py +236 -213
- webscout/Provider/elmo.py +291 -283
- webscout/Provider/geminiapi.py +343 -208
- webscout/Provider/julius.py +245 -223
- webscout/Provider/learnfastai.py +333 -325
- webscout/Provider/llama3mitril.py +230 -215
- webscout/Provider/llmchat.py +308 -258
- webscout/Provider/llmchatco.py +321 -306
- webscout/Provider/meta.py +996 -801
- webscout/Provider/oivscode.py +332 -309
- webscout/Provider/searchchat.py +316 -292
- webscout/Provider/sonus.py +264 -258
- webscout/Provider/toolbaz.py +359 -353
- webscout/Provider/turboseek.py +332 -266
- webscout/Provider/typefully.py +262 -202
- webscout/Provider/x0gpt.py +332 -299
- webscout/__init__.py +31 -39
- webscout/__main__.py +5 -5
- webscout/cli.py +585 -524
- webscout/client.py +1497 -70
- webscout/conversation.py +140 -436
- webscout/exceptions.py +383 -362
- webscout/litagent/__init__.py +29 -29
- webscout/litagent/agent.py +492 -455
- webscout/litagent/constants.py +60 -60
- webscout/models.py +505 -181
- webscout/optimizers.py +74 -420
- webscout/prompt_manager.py +376 -288
- webscout/sanitize.py +1514 -0
- webscout/scout/README.md +452 -404
- webscout/scout/__init__.py +8 -8
- webscout/scout/core/__init__.py +7 -7
- webscout/scout/core/crawler.py +330 -210
- webscout/scout/core/scout.py +800 -607
- webscout/scout/core/search_result.py +51 -96
- webscout/scout/core/text_analyzer.py +64 -63
- webscout/scout/core/text_utils.py +412 -277
- webscout/scout/core/web_analyzer.py +54 -52
- webscout/scout/element.py +872 -478
- webscout/scout/parsers/__init__.py +70 -69
- webscout/scout/parsers/html5lib_parser.py +182 -172
- webscout/scout/parsers/html_parser.py +238 -236
- webscout/scout/parsers/lxml_parser.py +203 -178
- webscout/scout/utils.py +38 -37
- webscout/search/__init__.py +47 -0
- webscout/search/base.py +201 -0
- webscout/search/bing_main.py +45 -0
- webscout/search/brave_main.py +92 -0
- webscout/search/duckduckgo_main.py +57 -0
- webscout/search/engines/__init__.py +127 -0
- webscout/search/engines/bing/__init__.py +15 -0
- webscout/search/engines/bing/base.py +35 -0
- webscout/search/engines/bing/images.py +114 -0
- webscout/search/engines/bing/news.py +96 -0
- webscout/search/engines/bing/suggestions.py +36 -0
- webscout/search/engines/bing/text.py +109 -0
- webscout/search/engines/brave/__init__.py +19 -0
- webscout/search/engines/brave/base.py +47 -0
- webscout/search/engines/brave/images.py +213 -0
- webscout/search/engines/brave/news.py +353 -0
- webscout/search/engines/brave/suggestions.py +318 -0
- webscout/search/engines/brave/text.py +167 -0
- webscout/search/engines/brave/videos.py +364 -0
- webscout/search/engines/duckduckgo/__init__.py +25 -0
- webscout/search/engines/duckduckgo/answers.py +80 -0
- webscout/search/engines/duckduckgo/base.py +189 -0
- webscout/search/engines/duckduckgo/images.py +100 -0
- webscout/search/engines/duckduckgo/maps.py +183 -0
- webscout/search/engines/duckduckgo/news.py +70 -0
- webscout/search/engines/duckduckgo/suggestions.py +22 -0
- webscout/search/engines/duckduckgo/text.py +221 -0
- webscout/search/engines/duckduckgo/translate.py +48 -0
- webscout/search/engines/duckduckgo/videos.py +80 -0
- webscout/search/engines/duckduckgo/weather.py +84 -0
- webscout/search/engines/mojeek.py +61 -0
- webscout/search/engines/wikipedia.py +77 -0
- webscout/search/engines/yahoo/__init__.py +41 -0
- webscout/search/engines/yahoo/answers.py +19 -0
- webscout/search/engines/yahoo/base.py +34 -0
- webscout/search/engines/yahoo/images.py +323 -0
- webscout/search/engines/yahoo/maps.py +19 -0
- webscout/search/engines/yahoo/news.py +258 -0
- webscout/search/engines/yahoo/suggestions.py +140 -0
- webscout/search/engines/yahoo/text.py +273 -0
- webscout/search/engines/yahoo/translate.py +19 -0
- webscout/search/engines/yahoo/videos.py +302 -0
- webscout/search/engines/yahoo/weather.py +220 -0
- webscout/search/engines/yandex.py +67 -0
- webscout/search/engines/yep/__init__.py +13 -0
- webscout/search/engines/yep/base.py +34 -0
- webscout/search/engines/yep/images.py +101 -0
- webscout/search/engines/yep/suggestions.py +38 -0
- webscout/search/engines/yep/text.py +99 -0
- webscout/search/http_client.py +172 -0
- webscout/search/results.py +141 -0
- webscout/search/yahoo_main.py +57 -0
- webscout/search/yep_main.py +48 -0
- webscout/server/__init__.py +48 -0
- webscout/server/config.py +78 -0
- webscout/server/exceptions.py +69 -0
- webscout/server/providers.py +286 -0
- webscout/server/request_models.py +131 -0
- webscout/server/request_processing.py +404 -0
- webscout/server/routes.py +642 -0
- webscout/server/server.py +351 -0
- webscout/server/ui_templates.py +1171 -0
- webscout/swiftcli/__init__.py +79 -95
- webscout/swiftcli/core/__init__.py +7 -7
- webscout/swiftcli/core/cli.py +574 -297
- webscout/swiftcli/core/context.py +98 -104
- webscout/swiftcli/core/group.py +268 -241
- webscout/swiftcli/decorators/__init__.py +28 -28
- webscout/swiftcli/decorators/command.py +243 -221
- webscout/swiftcli/decorators/options.py +247 -220
- webscout/swiftcli/decorators/output.py +392 -252
- webscout/swiftcli/exceptions.py +21 -21
- webscout/swiftcli/plugins/__init__.py +9 -9
- webscout/swiftcli/plugins/base.py +134 -135
- webscout/swiftcli/plugins/manager.py +269 -269
- webscout/swiftcli/utils/__init__.py +58 -59
- webscout/swiftcli/utils/formatting.py +251 -252
- webscout/swiftcli/utils/parsing.py +368 -267
- webscout/update_checker.py +280 -136
- webscout/utils.py +28 -14
- webscout/version.py +2 -1
- webscout/version.py.bak +3 -0
- webscout/zeroart/__init__.py +218 -135
- webscout/zeroart/base.py +70 -66
- webscout/zeroart/effects.py +155 -101
- webscout/zeroart/fonts.py +1799 -1239
- webscout-2026.1.19.dist-info/METADATA +638 -0
- webscout-2026.1.19.dist-info/RECORD +312 -0
- {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/WHEEL +1 -1
- {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/entry_points.txt +1 -1
- webscout/DWEBS.py +0 -520
- webscout/Extra/Act.md +0 -309
- webscout/Extra/GitToolkit/gitapi/README.md +0 -110
- webscout/Extra/autocoder/__init__.py +0 -9
- webscout/Extra/autocoder/autocoder.py +0 -1105
- webscout/Extra/autocoder/autocoder_utiles.py +0 -332
- webscout/Extra/gguf.md +0 -430
- webscout/Extra/weather.md +0 -281
- webscout/Litlogger/README.md +0 -10
- webscout/Litlogger/__init__.py +0 -15
- webscout/Litlogger/formats.py +0 -4
- webscout/Litlogger/handlers.py +0 -103
- webscout/Litlogger/levels.py +0 -13
- webscout/Litlogger/logger.py +0 -92
- webscout/Provider/AI21.py +0 -177
- webscout/Provider/AISEARCH/DeepFind.py +0 -254
- webscout/Provider/AISEARCH/felo_search.py +0 -202
- webscout/Provider/AISEARCH/genspark_search.py +0 -324
- webscout/Provider/AISEARCH/hika_search.py +0 -186
- webscout/Provider/AISEARCH/scira_search.py +0 -298
- webscout/Provider/Aitopia.py +0 -316
- webscout/Provider/AllenAI.py +0 -440
- webscout/Provider/Blackboxai.py +0 -791
- webscout/Provider/ChatGPTClone.py +0 -237
- webscout/Provider/ChatGPTGratis.py +0 -194
- webscout/Provider/Cloudflare.py +0 -324
- webscout/Provider/ExaChat.py +0 -358
- webscout/Provider/Flowith.py +0 -217
- webscout/Provider/FreeGemini.py +0 -250
- webscout/Provider/Glider.py +0 -225
- webscout/Provider/HF_space/__init__.py +0 -0
- webscout/Provider/HF_space/qwen_qwen2.py +0 -206
- webscout/Provider/HuggingFaceChat.py +0 -469
- webscout/Provider/Hunyuan.py +0 -283
- webscout/Provider/LambdaChat.py +0 -411
- webscout/Provider/Llama3.py +0 -259
- webscout/Provider/Nemotron.py +0 -218
- webscout/Provider/OLLAMA.py +0 -396
- webscout/Provider/OPENAI/BLACKBOXAI.py +0 -766
- webscout/Provider/OPENAI/Cloudflare.py +0 -378
- webscout/Provider/OPENAI/FreeGemini.py +0 -283
- webscout/Provider/OPENAI/NEMOTRON.py +0 -232
- webscout/Provider/OPENAI/Qwen3.py +0 -283
- webscout/Provider/OPENAI/api.py +0 -969
- webscout/Provider/OPENAI/c4ai.py +0 -373
- webscout/Provider/OPENAI/chatgptclone.py +0 -494
- webscout/Provider/OPENAI/copilot.py +0 -242
- webscout/Provider/OPENAI/flowith.py +0 -162
- webscout/Provider/OPENAI/freeaichat.py +0 -359
- webscout/Provider/OPENAI/mcpcore.py +0 -389
- webscout/Provider/OPENAI/multichat.py +0 -376
- webscout/Provider/OPENAI/opkfc.py +0 -496
- webscout/Provider/OPENAI/scirachat.py +0 -477
- webscout/Provider/OPENAI/standardinput.py +0 -433
- webscout/Provider/OPENAI/typegpt.py +0 -364
- webscout/Provider/OPENAI/uncovrAI.py +0 -463
- webscout/Provider/OPENAI/venice.py +0 -431
- webscout/Provider/OPENAI/yep.py +0 -382
- webscout/Provider/OpenGPT.py +0 -209
- webscout/Provider/Perplexitylabs.py +0 -415
- webscout/Provider/Reka.py +0 -214
- webscout/Provider/StandardInput.py +0 -290
- webscout/Provider/TTI/aiarta.py +0 -365
- webscout/Provider/TTI/artbit.py +0 -0
- webscout/Provider/TTI/fastflux.py +0 -200
- webscout/Provider/TTI/piclumen.py +0 -203
- webscout/Provider/TTI/pixelmuse.py +0 -225
- webscout/Provider/TTS/gesserit.py +0 -128
- webscout/Provider/TTS/sthir.py +0 -94
- webscout/Provider/TeachAnything.py +0 -229
- webscout/Provider/UNFINISHED/puterjs.py +0 -635
- webscout/Provider/UNFINISHED/test_lmarena.py +0 -119
- webscout/Provider/Venice.py +0 -258
- webscout/Provider/VercelAI.py +0 -253
- webscout/Provider/Writecream.py +0 -246
- webscout/Provider/WritingMate.py +0 -269
- webscout/Provider/asksteve.py +0 -220
- webscout/Provider/chatglm.py +0 -215
- webscout/Provider/copilot.py +0 -425
- webscout/Provider/freeaichat.py +0 -285
- webscout/Provider/granite.py +0 -235
- webscout/Provider/hermes.py +0 -266
- webscout/Provider/koala.py +0 -170
- webscout/Provider/lmarena.py +0 -198
- webscout/Provider/multichat.py +0 -364
- webscout/Provider/scira_chat.py +0 -299
- webscout/Provider/scnet.py +0 -243
- webscout/Provider/talkai.py +0 -194
- webscout/Provider/typegpt.py +0 -289
- webscout/Provider/uncovr.py +0 -368
- webscout/Provider/yep.py +0 -389
- webscout/litagent/Readme.md +0 -276
- webscout/litprinter/__init__.py +0 -59
- webscout/swiftcli/Readme.md +0 -323
- webscout/tempid.py +0 -128
- webscout/webscout_search.py +0 -1184
- webscout/webscout_search_async.py +0 -654
- webscout/yep_search.py +0 -347
- webscout/zeroart/README.md +0 -89
- webscout-8.2.9.dist-info/METADATA +0 -1033
- webscout-8.2.9.dist-info/RECORD +0 -289
- {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/licenses/LICENSE.md +0 -0
- {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
"""DuckDuckGo text search."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import warnings
|
|
6
|
+
from random import shuffle
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
from ....exceptions import WebscoutE
|
|
10
|
+
from ....search.results import TextResult
|
|
11
|
+
from .base import DuckDuckGoBase
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DuckDuckGoTextSearch(DuckDuckGoBase):
|
|
15
|
+
"""DuckDuckGo text/web search."""
|
|
16
|
+
|
|
17
|
+
name = "duckduckgo"
|
|
18
|
+
category = "text"
|
|
19
|
+
|
|
20
|
+
def run(self, *args, **kwargs) -> list[TextResult]:
|
|
21
|
+
"""Perform text search on DuckDuckGo.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
keywords: Search query.
|
|
25
|
+
region: Region code (e.g., wt-wt, us-en).
|
|
26
|
+
safesearch: on, moderate, or off.
|
|
27
|
+
timelimit: d, w, m, or y.
|
|
28
|
+
backend: html, lite, or auto.
|
|
29
|
+
max_results: Maximum number of results.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
List of TextResult objects.
|
|
33
|
+
"""
|
|
34
|
+
keywords = args[0] if args else kwargs.get("keywords")
|
|
35
|
+
region = args[1] if len(args) > 1 else kwargs.get("region", "wt-wt")
|
|
36
|
+
args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
|
|
37
|
+
timelimit = args[3] if len(args) > 3 else kwargs.get("timelimit")
|
|
38
|
+
backend = args[4] if len(args) > 4 else kwargs.get("backend", "auto")
|
|
39
|
+
max_results = args[5] if len(args) > 5 else kwargs.get("max_results")
|
|
40
|
+
|
|
41
|
+
if keywords is None:
|
|
42
|
+
raise ValueError("keywords cannot be None")
|
|
43
|
+
|
|
44
|
+
if backend in ("api", "ecosia"):
|
|
45
|
+
warnings.warn(f"{backend=} is deprecated, using backend='auto'", stacklevel=2)
|
|
46
|
+
backend = "auto"
|
|
47
|
+
backends = ["html", "lite"] if backend == "auto" else [backend]
|
|
48
|
+
shuffle(backends)
|
|
49
|
+
|
|
50
|
+
results, err = [], None
|
|
51
|
+
for b in backends:
|
|
52
|
+
try:
|
|
53
|
+
if b == "html":
|
|
54
|
+
results = self._text_html(keywords, region, timelimit, max_results)
|
|
55
|
+
elif b == "lite":
|
|
56
|
+
results = self._text_lite(keywords, region, timelimit, max_results)
|
|
57
|
+
return results
|
|
58
|
+
except Exception as ex:
|
|
59
|
+
err = ex
|
|
60
|
+
|
|
61
|
+
raise WebscoutE(err)
|
|
62
|
+
|
|
63
|
+
def _text_html(
|
|
64
|
+
self,
|
|
65
|
+
keywords: str,
|
|
66
|
+
region: str = "wt-wt",
|
|
67
|
+
timelimit: str | None = None,
|
|
68
|
+
max_results: int | None = None,
|
|
69
|
+
) -> list[TextResult]:
|
|
70
|
+
"""Text search using HTML backend."""
|
|
71
|
+
assert keywords, "keywords is mandatory"
|
|
72
|
+
|
|
73
|
+
payload = {
|
|
74
|
+
"q": keywords,
|
|
75
|
+
"s": "0",
|
|
76
|
+
"o": "json",
|
|
77
|
+
"api": "d.js",
|
|
78
|
+
"vqd": "",
|
|
79
|
+
"kl": region,
|
|
80
|
+
"bing_market": region,
|
|
81
|
+
}
|
|
82
|
+
if timelimit:
|
|
83
|
+
payload["df"] = timelimit
|
|
84
|
+
if max_results and max_results > 20:
|
|
85
|
+
vqd = self._get_vqd(keywords)
|
|
86
|
+
payload["vqd"] = vqd
|
|
87
|
+
|
|
88
|
+
cache = set()
|
|
89
|
+
results: list[TextResult] = []
|
|
90
|
+
|
|
91
|
+
def _text_html_page(s: int) -> list[TextResult]:
|
|
92
|
+
payload["s"] = f"{s}"
|
|
93
|
+
resp_content = self._get_url("POST", "https://html.duckduckgo.com/html", data=payload).content
|
|
94
|
+
if b"No results." in resp_content:
|
|
95
|
+
return []
|
|
96
|
+
|
|
97
|
+
page_results = []
|
|
98
|
+
tree = self.parser.fromstring(resp_content)
|
|
99
|
+
elements = tree.xpath("//div[h2]")
|
|
100
|
+
if not isinstance(elements, list):
|
|
101
|
+
return []
|
|
102
|
+
for e in elements:
|
|
103
|
+
if isinstance(e, self.parser.etree.Element):
|
|
104
|
+
hrefxpath = e.xpath("./a/@href")
|
|
105
|
+
href = str(hrefxpath[0]) if hrefxpath and isinstance(hrefxpath, list) else None
|
|
106
|
+
if (
|
|
107
|
+
href
|
|
108
|
+
and href not in cache
|
|
109
|
+
and not href.startswith(
|
|
110
|
+
("http://www.google.com/search?q=", "https://duckduckgo.com/y.js?ad_domain")
|
|
111
|
+
)
|
|
112
|
+
):
|
|
113
|
+
cache.add(href)
|
|
114
|
+
titlexpath = e.xpath("./h2/a/text()")
|
|
115
|
+
title = str(titlexpath[0]) if titlexpath and isinstance(titlexpath, list) else ""
|
|
116
|
+
bodyxpath = e.xpath("./a//text()")
|
|
117
|
+
body = "".join(str(x) for x in bodyxpath) if bodyxpath and isinstance(bodyxpath, list) else ""
|
|
118
|
+
result = TextResult(
|
|
119
|
+
title=self._normalize(title),
|
|
120
|
+
href=self._normalize_url(href),
|
|
121
|
+
body=self._normalize(body),
|
|
122
|
+
)
|
|
123
|
+
page_results.append(result)
|
|
124
|
+
return page_results
|
|
125
|
+
|
|
126
|
+
slist = [0]
|
|
127
|
+
if max_results:
|
|
128
|
+
max_results = min(max_results, 2023)
|
|
129
|
+
slist.extend(range(23, max_results, 50))
|
|
130
|
+
try:
|
|
131
|
+
for r in self._executor.map(_text_html_page, slist):
|
|
132
|
+
results.extend(r)
|
|
133
|
+
except Exception as e:
|
|
134
|
+
raise e
|
|
135
|
+
|
|
136
|
+
return list(self.islice(results, max_results))
|
|
137
|
+
|
|
138
|
+
def _text_lite(
|
|
139
|
+
self,
|
|
140
|
+
keywords: str,
|
|
141
|
+
region: str = "wt-wt",
|
|
142
|
+
timelimit: str | None = None,
|
|
143
|
+
max_results: int | None = None,
|
|
144
|
+
) -> list[TextResult]:
|
|
145
|
+
"""Text search using lite backend."""
|
|
146
|
+
assert keywords, "keywords is mandatory"
|
|
147
|
+
|
|
148
|
+
payload = {
|
|
149
|
+
"q": keywords,
|
|
150
|
+
"s": "0",
|
|
151
|
+
"o": "json",
|
|
152
|
+
"api": "d.js",
|
|
153
|
+
"vqd": "",
|
|
154
|
+
"kl": region,
|
|
155
|
+
"bing_market": region,
|
|
156
|
+
}
|
|
157
|
+
if timelimit:
|
|
158
|
+
payload["df"] = timelimit
|
|
159
|
+
|
|
160
|
+
cache = set()
|
|
161
|
+
results: list[TextResult] = []
|
|
162
|
+
|
|
163
|
+
def _text_lite_page(s: int) -> list[TextResult]:
|
|
164
|
+
payload["s"] = f"{s}"
|
|
165
|
+
resp_content = self._get_url("POST", "https://lite.duckduckgo.com/lite/", data=payload).content
|
|
166
|
+
if b"No more results." in resp_content:
|
|
167
|
+
return []
|
|
168
|
+
|
|
169
|
+
page_results = []
|
|
170
|
+
tree = self.parser.fromstring(resp_content)
|
|
171
|
+
elements = tree.xpath("//table[last()]//tr")
|
|
172
|
+
if not isinstance(elements, list):
|
|
173
|
+
return []
|
|
174
|
+
|
|
175
|
+
href: Optional[str] = None
|
|
176
|
+
title: str = ""
|
|
177
|
+
data = zip(self.cycle(range(1, 5)), elements)
|
|
178
|
+
for i, e in data:
|
|
179
|
+
if isinstance(e, self.parser.etree.Element):
|
|
180
|
+
if i == 1:
|
|
181
|
+
hrefxpath = e.xpath(".//a//@href")
|
|
182
|
+
href = str(hrefxpath[0]) if hrefxpath and isinstance(hrefxpath, list) else None
|
|
183
|
+
if (
|
|
184
|
+
href is None
|
|
185
|
+
or href in cache
|
|
186
|
+
or href.startswith(
|
|
187
|
+
("http://www.google.com/search?q=", "https://duckduckgo.com/y.js?ad_domain")
|
|
188
|
+
)
|
|
189
|
+
):
|
|
190
|
+
[next(data, None) for _ in range(3)] # skip block(i=1,2,3,4)
|
|
191
|
+
else:
|
|
192
|
+
cache.add(href)
|
|
193
|
+
titlexpath = e.xpath(".//a//text()")
|
|
194
|
+
title = str(titlexpath[0]) if titlexpath and isinstance(titlexpath, list) else ""
|
|
195
|
+
elif i == 2:
|
|
196
|
+
bodyxpath = e.xpath(".//td[@class='result-snippet']//text()")
|
|
197
|
+
body = (
|
|
198
|
+
"".join(str(x) for x in bodyxpath).strip()
|
|
199
|
+
if bodyxpath and isinstance(bodyxpath, list)
|
|
200
|
+
else ""
|
|
201
|
+
)
|
|
202
|
+
if href:
|
|
203
|
+
result = TextResult(
|
|
204
|
+
title=self._normalize(title),
|
|
205
|
+
href=self._normalize_url(href),
|
|
206
|
+
body=self._normalize(body),
|
|
207
|
+
)
|
|
208
|
+
page_results.append(result)
|
|
209
|
+
return page_results
|
|
210
|
+
|
|
211
|
+
slist = [0]
|
|
212
|
+
if max_results:
|
|
213
|
+
max_results = min(max_results, 2023)
|
|
214
|
+
slist.extend(range(23, max_results, 50))
|
|
215
|
+
try:
|
|
216
|
+
for r in self._executor.map(_text_lite_page, slist):
|
|
217
|
+
results.extend(r)
|
|
218
|
+
except Exception as e:
|
|
219
|
+
raise e
|
|
220
|
+
|
|
221
|
+
return list(self.islice(results, max_results))
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .base import DuckDuckGoBase
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DuckDuckGoTranslate(DuckDuckGoBase):
|
|
7
|
+
name = "duckduckgo"
|
|
8
|
+
category = "translate"
|
|
9
|
+
def run(self, *args, **kwargs) -> list[dict[str, str]]:
|
|
10
|
+
keywords = args[0] if args else kwargs.get("keywords")
|
|
11
|
+
from_ = args[1] if len(args) > 1 else kwargs.get("from_")
|
|
12
|
+
to = args[2] if len(args) > 2 else kwargs.get("to", "en")
|
|
13
|
+
|
|
14
|
+
assert keywords, "keywords is mandatory"
|
|
15
|
+
|
|
16
|
+
vqd = self._get_vqd("translate")
|
|
17
|
+
|
|
18
|
+
payload = {
|
|
19
|
+
"vqd": vqd,
|
|
20
|
+
"query": "translate",
|
|
21
|
+
"to": to,
|
|
22
|
+
}
|
|
23
|
+
if from_:
|
|
24
|
+
payload["from"] = from_
|
|
25
|
+
|
|
26
|
+
def _translate_keyword(keyword: str) -> dict[str, str]:
|
|
27
|
+
resp_content = self._get_url(
|
|
28
|
+
"POST",
|
|
29
|
+
"https://duckduckgo.com/translation.js",
|
|
30
|
+
params=payload,
|
|
31
|
+
content=keyword.encode(),
|
|
32
|
+
).content
|
|
33
|
+
page_data: dict[str, str] = self.json_loads(resp_content)
|
|
34
|
+
page_data["original"] = keyword
|
|
35
|
+
return page_data
|
|
36
|
+
|
|
37
|
+
if isinstance(keywords, str):
|
|
38
|
+
keywords = [keywords]
|
|
39
|
+
|
|
40
|
+
results = []
|
|
41
|
+
try:
|
|
42
|
+
for r in self._executor.map(_translate_keyword, keywords):
|
|
43
|
+
results.append(r)
|
|
44
|
+
except Exception as e:
|
|
45
|
+
raise e
|
|
46
|
+
|
|
47
|
+
return results
|
|
48
|
+
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from ....search.results import VideosResult
|
|
4
|
+
from .base import DuckDuckGoBase
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DuckDuckGoVideos(DuckDuckGoBase):
|
|
8
|
+
name = "duckduckgo"
|
|
9
|
+
category = "videos"
|
|
10
|
+
def run(self, *args, **kwargs) -> list[VideosResult]:
|
|
11
|
+
keywords = args[0] if args else kwargs.get("keywords")
|
|
12
|
+
region = args[1] if len(args) > 1 else kwargs.get("region", "wt-wt")
|
|
13
|
+
safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
|
|
14
|
+
timelimit = args[3] if len(args) > 3 else kwargs.get("timelimit")
|
|
15
|
+
resolution = args[4] if len(args) > 4 else kwargs.get("resolution")
|
|
16
|
+
duration = args[5] if len(args) > 5 else kwargs.get("duration")
|
|
17
|
+
license_videos = args[6] if len(args) > 6 else kwargs.get("license_videos")
|
|
18
|
+
max_results = args[7] if len(args) > 7 else kwargs.get("max_results")
|
|
19
|
+
|
|
20
|
+
assert keywords, "keywords is mandatory"
|
|
21
|
+
|
|
22
|
+
vqd = self._get_vqd(keywords)
|
|
23
|
+
|
|
24
|
+
safesearch_base = {"on": "1", "moderate": "-1", "off": "-2"}
|
|
25
|
+
timelimit = f"publishedAfter:{timelimit}" if timelimit else ""
|
|
26
|
+
resolution = f"videoDefinition:{resolution}" if resolution else ""
|
|
27
|
+
duration = f"videoDuration:{duration}" if duration else ""
|
|
28
|
+
license_videos = f"videoLicense:{license_videos}" if license_videos else ""
|
|
29
|
+
payload = {
|
|
30
|
+
"l": region,
|
|
31
|
+
"o": "json",
|
|
32
|
+
"q": keywords,
|
|
33
|
+
"vqd": vqd,
|
|
34
|
+
"f": f"{timelimit},{resolution},{duration},{license_videos}",
|
|
35
|
+
"p": safesearch_base[safesearch.lower()],
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
cache = set()
|
|
39
|
+
results: list[VideosResult] = []
|
|
40
|
+
|
|
41
|
+
def _videos_page(s: int) -> list[VideosResult]:
|
|
42
|
+
payload["s"] = f"{s}"
|
|
43
|
+
resp_content = self._get_url("GET", "https://duckduckgo.com/v.js", params=payload).content
|
|
44
|
+
resp_json = self.json_loads(resp_content)
|
|
45
|
+
|
|
46
|
+
page_data = resp_json.get("results", [])
|
|
47
|
+
page_results = []
|
|
48
|
+
for row in page_data:
|
|
49
|
+
if row["content"] not in cache:
|
|
50
|
+
cache.add(row["content"])
|
|
51
|
+
result = VideosResult(
|
|
52
|
+
content=row.get("content", ""),
|
|
53
|
+
description=row.get("description", ""),
|
|
54
|
+
duration=row.get("duration", ""),
|
|
55
|
+
embed_html=row.get("embed_html", ""),
|
|
56
|
+
embed_url=row.get("embed_url", ""),
|
|
57
|
+
image_token=row.get("image_token", ""),
|
|
58
|
+
images=row.get("images", {}),
|
|
59
|
+
provider=row.get("provider", ""),
|
|
60
|
+
published=row.get("published", ""),
|
|
61
|
+
publisher=row.get("publisher", ""),
|
|
62
|
+
statistics=row.get("statistics", {}),
|
|
63
|
+
title=row.get("title", ""),
|
|
64
|
+
uploader=row.get("uploader", ""),
|
|
65
|
+
)
|
|
66
|
+
page_results.append(result)
|
|
67
|
+
return page_results
|
|
68
|
+
|
|
69
|
+
slist = [0]
|
|
70
|
+
if max_results:
|
|
71
|
+
max_results = min(max_results, 400)
|
|
72
|
+
slist.extend(range(60, max_results, 60))
|
|
73
|
+
try:
|
|
74
|
+
for r in self._executor.map(_videos_page, slist):
|
|
75
|
+
results.extend(r)
|
|
76
|
+
except Exception as e:
|
|
77
|
+
raise e
|
|
78
|
+
|
|
79
|
+
return list(self.islice(results, max_results))
|
|
80
|
+
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any, Dict, List, Optional, TypedDict
|
|
6
|
+
from urllib.parse import quote
|
|
7
|
+
|
|
8
|
+
from ....exceptions import WebscoutE
|
|
9
|
+
from .base import DuckDuckGoBase
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class WeatherData(TypedDict):
|
|
13
|
+
location: str
|
|
14
|
+
current: Dict[str, Any]
|
|
15
|
+
daily_forecast: List[Dict[str, Any]]
|
|
16
|
+
hourly_forecast: List[Dict[str, Any]]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class DuckDuckGoWeather(DuckDuckGoBase):
|
|
20
|
+
name = "duckduckgo"
|
|
21
|
+
category = "weather"
|
|
22
|
+
def run(self, *args, **kwargs) -> WeatherData:
|
|
23
|
+
location = args[0] if args else kwargs.get("location")
|
|
24
|
+
language = args[1] if len(args) > 1 else kwargs.get("language", "en")
|
|
25
|
+
|
|
26
|
+
assert location, "location is mandatory"
|
|
27
|
+
lang = language.split('-')[0]
|
|
28
|
+
url = f"https://duckduckgo.com/js/spice/forecast/{quote(location)}/{lang}"
|
|
29
|
+
|
|
30
|
+
resp = self._get_url("GET", url).content
|
|
31
|
+
resp_text = resp.decode('utf-8')
|
|
32
|
+
|
|
33
|
+
if "ddg_spice_forecast(" not in resp_text:
|
|
34
|
+
raise WebscoutE(f"No weather data found for {location}")
|
|
35
|
+
|
|
36
|
+
json_text = resp_text[resp_text.find('(') + 1:resp_text.rfind(')')]
|
|
37
|
+
try:
|
|
38
|
+
result = json.loads(json_text)
|
|
39
|
+
except Exception as e:
|
|
40
|
+
raise WebscoutE(f"Error parsing weather JSON: {e}")
|
|
41
|
+
|
|
42
|
+
if not result or 'currentWeather' not in result or 'forecastDaily' not in result:
|
|
43
|
+
raise WebscoutE(f"Invalid weather data format for {location}")
|
|
44
|
+
|
|
45
|
+
formatted_data: WeatherData = {
|
|
46
|
+
"location": result["currentWeather"]["metadata"].get("ddg-location", "Unknown"),
|
|
47
|
+
"current": {
|
|
48
|
+
"condition": result["currentWeather"].get("conditionCode"),
|
|
49
|
+
"temperature_c": result["currentWeather"].get("temperature"),
|
|
50
|
+
"feels_like_c": result["currentWeather"].get("temperatureApparent"),
|
|
51
|
+
"humidity": result["currentWeather"].get("humidity"),
|
|
52
|
+
"wind_speed_ms": result["currentWeather"].get("windSpeed"),
|
|
53
|
+
"wind_direction": result["currentWeather"].get("windDirection"),
|
|
54
|
+
"visibility_m": result["currentWeather"].get("visibility"),
|
|
55
|
+
},
|
|
56
|
+
"daily_forecast": [],
|
|
57
|
+
"hourly_forecast": []
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
for day in result["forecastDaily"]["days"]:
|
|
61
|
+
formatted_data["daily_forecast"].append({
|
|
62
|
+
"date": datetime.fromisoformat(day["forecastStart"].replace("Z", "+00:00")).strftime("%Y-%m-%d"),
|
|
63
|
+
"condition": day["daytimeForecast"].get("conditionCode"),
|
|
64
|
+
"max_temp_c": day["temperatureMax"],
|
|
65
|
+
"min_temp_c": day["temperatureMin"],
|
|
66
|
+
"sunrise": datetime.fromisoformat(day["sunrise"].replace("Z", "+00:00")).strftime("%H:%M"),
|
|
67
|
+
"sunset": datetime.fromisoformat(day["sunset"].replace("Z", "+00:00")).strftime("%H:%M"),
|
|
68
|
+
})
|
|
69
|
+
|
|
70
|
+
if 'forecastHourly' in result and 'hours' in result['forecastHourly']:
|
|
71
|
+
for hour in result['forecastHourly']['hours']:
|
|
72
|
+
formatted_data["hourly_forecast"].append({
|
|
73
|
+
"time": datetime.fromisoformat(hour["forecastStart"].replace("Z", "+00:00")).strftime("%H:%M"),
|
|
74
|
+
"condition": hour.get("conditionCode"),
|
|
75
|
+
"temperature_c": hour.get("temperature"),
|
|
76
|
+
"feels_like_c": hour.get("temperatureApparent"),
|
|
77
|
+
"humidity": hour.get("humidity"),
|
|
78
|
+
"wind_speed_ms": hour.get("windSpeed"),
|
|
79
|
+
"wind_direction": hour.get("windDirection"),
|
|
80
|
+
"visibility_m": hour.get("visibility"),
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
return formatted_data
|
|
84
|
+
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Mojeek search engine implementation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Mapping
|
|
6
|
+
from typing import Any, Optional
|
|
7
|
+
|
|
8
|
+
from ..base import BaseSearchEngine
|
|
9
|
+
from ..results import TextResult
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Mojeek(BaseSearchEngine[TextResult]):
|
|
13
|
+
"""Mojeek search engine."""
|
|
14
|
+
|
|
15
|
+
name = "mojeek"
|
|
16
|
+
category = "text"
|
|
17
|
+
provider = "mojeek"
|
|
18
|
+
|
|
19
|
+
search_url = "https://www.mojeek.com/search"
|
|
20
|
+
search_method = "GET"
|
|
21
|
+
|
|
22
|
+
items_xpath = "//ul[contains(@class, 'results')]/li"
|
|
23
|
+
elements_xpath: Mapping[str, str] = {
|
|
24
|
+
"title": ".//h2//text()",
|
|
25
|
+
"href": ".//h2/a/@href",
|
|
26
|
+
"body": ".//p[@class='s']//text()",
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
def build_payload(
|
|
30
|
+
self, query: str, region: str, safesearch: str, timelimit: str | None, page: int = 1, **kwargs: Any
|
|
31
|
+
) -> dict[str, Any]:
|
|
32
|
+
"""Build a payload for the search request."""
|
|
33
|
+
safesearch_base = {"on": "1", "moderate": "0", "off": "0"}
|
|
34
|
+
payload = {"q": query, "safe": safesearch_base[safesearch.lower()]}
|
|
35
|
+
if page > 1:
|
|
36
|
+
payload["s"] = f"{(page - 1) * 10 + 1}"
|
|
37
|
+
return payload
|
|
38
|
+
|
|
39
|
+
def run(self, *args, **kwargs) -> list[TextResult]:
|
|
40
|
+
"""Run text search on Mojeek.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
keywords: Search query.
|
|
44
|
+
region: Region code.
|
|
45
|
+
safesearch: Safe search level.
|
|
46
|
+
max_results: Maximum number of results (ignored for now).
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
List of TextResult objects.
|
|
50
|
+
"""
|
|
51
|
+
keywords = args[0] if args else kwargs.get("keywords")
|
|
52
|
+
if keywords is None:
|
|
53
|
+
keywords = ""
|
|
54
|
+
region = args[1] if len(args) > 1 else kwargs.get("region", "us-en")
|
|
55
|
+
safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
|
|
56
|
+
max_results = args[3] if len(args) > 3 else kwargs.get("max_results")
|
|
57
|
+
|
|
58
|
+
results = self.search(query=keywords, region=region, safesearch=safesearch)
|
|
59
|
+
if results and max_results:
|
|
60
|
+
results = results[:max_results]
|
|
61
|
+
return results or []
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Wikipedia text search engine."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Optional
|
|
6
|
+
from urllib.parse import quote
|
|
7
|
+
|
|
8
|
+
from ...utils import json_loads
|
|
9
|
+
from ..base import BaseSearchEngine
|
|
10
|
+
from ..results import TextResult
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Wikipedia(BaseSearchEngine[TextResult]):
|
|
14
|
+
"""Wikipedia text search engine."""
|
|
15
|
+
|
|
16
|
+
name = "wikipedia"
|
|
17
|
+
category = "text"
|
|
18
|
+
provider = "wikipedia"
|
|
19
|
+
priority = 2
|
|
20
|
+
|
|
21
|
+
search_url = "https://{lang}.wikipedia.org/w/api.php?action=opensearch&search={query}"
|
|
22
|
+
search_method = "GET"
|
|
23
|
+
|
|
24
|
+
def build_payload(
|
|
25
|
+
self, query: str, region: str, safesearch: str, timelimit: str | None, page: int = 1, **kwargs: Any
|
|
26
|
+
) -> dict[str, Any]:
|
|
27
|
+
"""Build a payload for the search request."""
|
|
28
|
+
_country, lang = region.lower().split("-")
|
|
29
|
+
encoded_query = quote(query)
|
|
30
|
+
self.search_url = (
|
|
31
|
+
f"https://{lang}.wikipedia.org/w/api.php?action=opensearch&profile=fuzzy&limit=1&search={encoded_query}"
|
|
32
|
+
)
|
|
33
|
+
payload: dict[str, Any] = {}
|
|
34
|
+
self.lang = lang # used in extract_results
|
|
35
|
+
return payload
|
|
36
|
+
|
|
37
|
+
def extract_results(self, html_text: str) -> list[TextResult]:
|
|
38
|
+
"""Extract search results from html text."""
|
|
39
|
+
json_data = json_loads(html_text)
|
|
40
|
+
if not json_data or len(json_data) < 4:
|
|
41
|
+
return []
|
|
42
|
+
|
|
43
|
+
results = []
|
|
44
|
+
titles, descriptions, urls = json_data[1], json_data[2], json_data[3]
|
|
45
|
+
|
|
46
|
+
for title, description, url in zip(titles, descriptions, urls):
|
|
47
|
+
result = TextResult()
|
|
48
|
+
result.title = title
|
|
49
|
+
result.body = description
|
|
50
|
+
result.href = url
|
|
51
|
+
results.append(result)
|
|
52
|
+
|
|
53
|
+
return results
|
|
54
|
+
|
|
55
|
+
def run(self, *args, **kwargs) -> list[TextResult]:
|
|
56
|
+
"""Run text search on Wikipedia.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
keywords: Search query.
|
|
60
|
+
region: Region code.
|
|
61
|
+
safesearch: Safe search level (ignored).
|
|
62
|
+
max_results: Maximum number of results.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
List of TextResult objects.
|
|
66
|
+
"""
|
|
67
|
+
keywords = args[0] if args else kwargs.get("keywords")
|
|
68
|
+
if keywords is None:
|
|
69
|
+
keywords = ""
|
|
70
|
+
region = args[1] if len(args) > 1 else kwargs.get("region", "en-us")
|
|
71
|
+
safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
|
|
72
|
+
max_results = args[3] if len(args) > 3 else kwargs.get("max_results")
|
|
73
|
+
|
|
74
|
+
results = self.search(query=keywords, region=region, safesearch=safesearch)
|
|
75
|
+
if results and max_results:
|
|
76
|
+
results = results[:max_results]
|
|
77
|
+
return results or []
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Yahoo search engines package.
|
|
2
|
+
|
|
3
|
+
This package provides comprehensive Yahoo search functionality including:
|
|
4
|
+
- Text search with multi-page pagination
|
|
5
|
+
- Image search with advanced filters
|
|
6
|
+
- Video search with quality and length filters
|
|
7
|
+
- News search with time filtering
|
|
8
|
+
- Search suggestions/autocomplete
|
|
9
|
+
|
|
10
|
+
All engines support:
|
|
11
|
+
- Human-like browsing through multiple pages
|
|
12
|
+
- Rich metadata extraction
|
|
13
|
+
- Filter support
|
|
14
|
+
- Clean result formatting
|
|
15
|
+
|
|
16
|
+
Example:
|
|
17
|
+
>>> from webscout.search.engines.yahoo import YahooText
|
|
18
|
+
>>>
|
|
19
|
+
>>> # Search with automatic pagination
|
|
20
|
+
>>> searcher = YahooText()
|
|
21
|
+
>>> results = searcher.search("python programming", max_results=50)
|
|
22
|
+
>>>
|
|
23
|
+
>>> for result in results:
|
|
24
|
+
... print(f"{result.title}: {result.url}")
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from .base import YahooSearchEngine
|
|
28
|
+
from .images import YahooImages
|
|
29
|
+
from .news import YahooNews
|
|
30
|
+
from .suggestions import YahooSuggestions
|
|
31
|
+
from .text import YahooText
|
|
32
|
+
from .videos import YahooVideos
|
|
33
|
+
|
|
34
|
+
__all__ = [
|
|
35
|
+
"YahooSearchEngine",
|
|
36
|
+
"YahooText",
|
|
37
|
+
"YahooImages",
|
|
38
|
+
"YahooVideos",
|
|
39
|
+
"YahooNews",
|
|
40
|
+
"YahooSuggestions",
|
|
41
|
+
]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Yahoo answers search."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .base import YahooSearchEngine
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class YahooAnswers(YahooSearchEngine):
|
|
9
|
+
"""Yahoo instant answers."""
|
|
10
|
+
|
|
11
|
+
def build_payload(self, *args, **kwargs) -> dict:
|
|
12
|
+
return {}
|
|
13
|
+
|
|
14
|
+
def run(self, *args, **kwargs) -> list[dict[str, str]]:
|
|
15
|
+
"""Get instant answers from Yahoo.
|
|
16
|
+
|
|
17
|
+
Not supported.
|
|
18
|
+
"""
|
|
19
|
+
raise NotImplementedError("Yahoo does not support instant answers")
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Base class for Yahoo search engines."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from secrets import token_urlsafe
|
|
6
|
+
from typing import Generic, Optional, TypeVar
|
|
7
|
+
|
|
8
|
+
from ...base import BaseSearchEngine
|
|
9
|
+
|
|
10
|
+
T = TypeVar("T")
|
|
11
|
+
|
|
12
|
+
class YahooSearchEngine(BaseSearchEngine[T], Generic[T]):
|
|
13
|
+
"""Base class for Yahoo search engines.
|
|
14
|
+
|
|
15
|
+
Yahoo search is powered by Bing but has its own interface.
|
|
16
|
+
All Yahoo searches use dynamic URLs with tokens for tracking.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
provider = "yahoo"
|
|
20
|
+
_base_url = "https://search.yahoo.com"
|
|
21
|
+
|
|
22
|
+
def generate_ylt_token(self) -> str:
|
|
23
|
+
"""Generate Yahoo _ylt tracking token."""
|
|
24
|
+
return token_urlsafe(24 * 3 // 4)
|
|
25
|
+
|
|
26
|
+
def generate_ylu_token(self) -> str:
|
|
27
|
+
"""Generate Yahoo _ylu tracking token."""
|
|
28
|
+
return token_urlsafe(47 * 3 // 4)
|
|
29
|
+
|
|
30
|
+
def build_search_url(self, base_path: str) -> str:
|
|
31
|
+
"""Build search URL with tracking tokens."""
|
|
32
|
+
ylt = self.generate_ylt_token()
|
|
33
|
+
ylu = self.generate_ylu_token()
|
|
34
|
+
return f"{self._base_url}/{base_path};_ylt={ylt};_ylu={ylu}"
|