webscout 8.2.9__py3-none-any.whl → 2026.1.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- webscout/AIauto.py +524 -251
- webscout/AIbase.py +247 -319
- webscout/AIutel.py +68 -703
- webscout/Bard.py +1072 -1026
- webscout/Extra/GitToolkit/__init__.py +10 -10
- webscout/Extra/GitToolkit/gitapi/__init__.py +20 -12
- webscout/Extra/GitToolkit/gitapi/gist.py +142 -0
- webscout/Extra/GitToolkit/gitapi/organization.py +91 -0
- webscout/Extra/GitToolkit/gitapi/repository.py +308 -195
- webscout/Extra/GitToolkit/gitapi/search.py +162 -0
- webscout/Extra/GitToolkit/gitapi/trending.py +236 -0
- webscout/Extra/GitToolkit/gitapi/user.py +128 -96
- webscout/Extra/GitToolkit/gitapi/utils.py +82 -62
- webscout/Extra/YTToolkit/README.md +443 -375
- webscout/Extra/YTToolkit/YTdownloader.py +953 -957
- webscout/Extra/YTToolkit/__init__.py +3 -3
- webscout/Extra/YTToolkit/transcriber.py +595 -476
- webscout/Extra/YTToolkit/ytapi/README.md +230 -44
- webscout/Extra/YTToolkit/ytapi/__init__.py +22 -6
- webscout/Extra/YTToolkit/ytapi/captions.py +190 -0
- webscout/Extra/YTToolkit/ytapi/channel.py +302 -307
- webscout/Extra/YTToolkit/ytapi/errors.py +13 -13
- webscout/Extra/YTToolkit/ytapi/extras.py +178 -118
- webscout/Extra/YTToolkit/ytapi/hashtag.py +120 -0
- webscout/Extra/YTToolkit/ytapi/https.py +89 -88
- webscout/Extra/YTToolkit/ytapi/patterns.py +61 -61
- webscout/Extra/YTToolkit/ytapi/playlist.py +59 -59
- webscout/Extra/YTToolkit/ytapi/pool.py +8 -8
- webscout/Extra/YTToolkit/ytapi/query.py +143 -40
- webscout/Extra/YTToolkit/ytapi/shorts.py +122 -0
- webscout/Extra/YTToolkit/ytapi/stream.py +68 -63
- webscout/Extra/YTToolkit/ytapi/suggestions.py +97 -0
- webscout/Extra/YTToolkit/ytapi/utils.py +66 -62
- webscout/Extra/YTToolkit/ytapi/video.py +403 -232
- webscout/Extra/__init__.py +2 -3
- webscout/Extra/gguf.py +1298 -684
- webscout/Extra/tempmail/README.md +487 -487
- webscout/Extra/tempmail/__init__.py +28 -28
- webscout/Extra/tempmail/async_utils.py +143 -141
- webscout/Extra/tempmail/base.py +172 -161
- webscout/Extra/tempmail/cli.py +191 -187
- webscout/Extra/tempmail/emailnator.py +88 -84
- webscout/Extra/tempmail/mail_tm.py +378 -361
- webscout/Extra/tempmail/temp_mail_io.py +304 -292
- webscout/Extra/weather.py +196 -194
- webscout/Extra/weather_ascii.py +17 -15
- webscout/Provider/AISEARCH/PERPLEXED_search.py +175 -0
- webscout/Provider/AISEARCH/Perplexity.py +292 -333
- webscout/Provider/AISEARCH/README.md +106 -279
- webscout/Provider/AISEARCH/__init__.py +16 -9
- webscout/Provider/AISEARCH/brave_search.py +298 -0
- webscout/Provider/AISEARCH/iask_search.py +357 -410
- webscout/Provider/AISEARCH/monica_search.py +200 -220
- webscout/Provider/AISEARCH/webpilotai_search.py +242 -255
- webscout/Provider/Algion.py +413 -0
- webscout/Provider/Andi.py +74 -69
- webscout/Provider/Apriel.py +313 -0
- webscout/Provider/Ayle.py +323 -0
- webscout/Provider/ChatSandbox.py +329 -342
- webscout/Provider/ClaudeOnline.py +365 -0
- webscout/Provider/Cohere.py +232 -208
- webscout/Provider/DeepAI.py +367 -0
- webscout/Provider/Deepinfra.py +467 -340
- webscout/Provider/EssentialAI.py +217 -0
- webscout/Provider/ExaAI.py +274 -261
- webscout/Provider/Gemini.py +175 -169
- webscout/Provider/GithubChat.py +385 -369
- webscout/Provider/Gradient.py +286 -0
- webscout/Provider/Groq.py +556 -801
- webscout/Provider/HadadXYZ.py +323 -0
- webscout/Provider/HeckAI.py +392 -375
- webscout/Provider/HuggingFace.py +387 -0
- webscout/Provider/IBM.py +340 -0
- webscout/Provider/Jadve.py +317 -291
- webscout/Provider/K2Think.py +306 -0
- webscout/Provider/Koboldai.py +221 -384
- webscout/Provider/Netwrck.py +273 -270
- webscout/Provider/Nvidia.py +310 -0
- webscout/Provider/OPENAI/DeepAI.py +489 -0
- webscout/Provider/OPENAI/K2Think.py +423 -0
- webscout/Provider/OPENAI/PI.py +463 -0
- webscout/Provider/OPENAI/README.md +890 -952
- webscout/Provider/OPENAI/TogetherAI.py +405 -0
- webscout/Provider/OPENAI/TwoAI.py +255 -357
- webscout/Provider/OPENAI/__init__.py +148 -40
- webscout/Provider/OPENAI/ai4chat.py +348 -293
- webscout/Provider/OPENAI/akashgpt.py +436 -0
- webscout/Provider/OPENAI/algion.py +303 -0
- webscout/Provider/OPENAI/{exachat.py → ayle.py} +365 -444
- webscout/Provider/OPENAI/base.py +253 -249
- webscout/Provider/OPENAI/cerebras.py +296 -0
- webscout/Provider/OPENAI/chatgpt.py +870 -556
- webscout/Provider/OPENAI/chatsandbox.py +233 -173
- webscout/Provider/OPENAI/deepinfra.py +403 -322
- webscout/Provider/OPENAI/e2b.py +2370 -1414
- webscout/Provider/OPENAI/elmo.py +278 -0
- webscout/Provider/OPENAI/exaai.py +452 -417
- webscout/Provider/OPENAI/freeassist.py +446 -0
- webscout/Provider/OPENAI/gradient.py +448 -0
- webscout/Provider/OPENAI/groq.py +380 -364
- webscout/Provider/OPENAI/hadadxyz.py +292 -0
- webscout/Provider/OPENAI/heckai.py +333 -308
- webscout/Provider/OPENAI/huggingface.py +321 -0
- webscout/Provider/OPENAI/ibm.py +425 -0
- webscout/Provider/OPENAI/llmchat.py +253 -0
- webscout/Provider/OPENAI/llmchatco.py +378 -335
- webscout/Provider/OPENAI/meta.py +541 -0
- webscout/Provider/OPENAI/netwrck.py +374 -357
- webscout/Provider/OPENAI/nvidia.py +317 -0
- webscout/Provider/OPENAI/oivscode.py +348 -287
- webscout/Provider/OPENAI/openrouter.py +328 -0
- webscout/Provider/OPENAI/pydantic_imports.py +1 -172
- webscout/Provider/OPENAI/sambanova.py +397 -0
- webscout/Provider/OPENAI/sonus.py +305 -304
- webscout/Provider/OPENAI/textpollinations.py +370 -339
- webscout/Provider/OPENAI/toolbaz.py +375 -413
- webscout/Provider/OPENAI/typefully.py +419 -355
- webscout/Provider/OPENAI/typliai.py +279 -0
- webscout/Provider/OPENAI/utils.py +314 -318
- webscout/Provider/OPENAI/wisecat.py +359 -387
- webscout/Provider/OPENAI/writecream.py +185 -163
- webscout/Provider/OPENAI/x0gpt.py +462 -365
- webscout/Provider/OPENAI/zenmux.py +380 -0
- webscout/Provider/OpenRouter.py +386 -0
- webscout/Provider/Openai.py +337 -496
- webscout/Provider/PI.py +443 -429
- webscout/Provider/QwenLM.py +346 -254
- webscout/Provider/STT/__init__.py +28 -0
- webscout/Provider/STT/base.py +303 -0
- webscout/Provider/STT/elevenlabs.py +264 -0
- webscout/Provider/Sambanova.py +317 -0
- webscout/Provider/TTI/README.md +69 -82
- webscout/Provider/TTI/__init__.py +37 -7
- webscout/Provider/TTI/base.py +147 -64
- webscout/Provider/TTI/claudeonline.py +393 -0
- webscout/Provider/TTI/magicstudio.py +292 -201
- webscout/Provider/TTI/miragic.py +180 -0
- webscout/Provider/TTI/pollinations.py +331 -221
- webscout/Provider/TTI/together.py +334 -0
- webscout/Provider/TTI/utils.py +14 -11
- webscout/Provider/TTS/README.md +186 -192
- webscout/Provider/TTS/__init__.py +43 -10
- webscout/Provider/TTS/base.py +523 -159
- webscout/Provider/TTS/deepgram.py +286 -156
- webscout/Provider/TTS/elevenlabs.py +189 -111
- webscout/Provider/TTS/freetts.py +218 -0
- webscout/Provider/TTS/murfai.py +288 -113
- webscout/Provider/TTS/openai_fm.py +364 -129
- webscout/Provider/TTS/parler.py +203 -111
- webscout/Provider/TTS/qwen.py +334 -0
- webscout/Provider/TTS/sherpa.py +286 -0
- webscout/Provider/TTS/speechma.py +693 -580
- webscout/Provider/TTS/streamElements.py +275 -333
- webscout/Provider/TTS/utils.py +280 -280
- webscout/Provider/TextPollinationsAI.py +331 -308
- webscout/Provider/TogetherAI.py +450 -0
- webscout/Provider/TwoAI.py +309 -475
- webscout/Provider/TypliAI.py +311 -305
- webscout/Provider/UNFINISHED/ChatHub.py +219 -209
- webscout/Provider/{OPENAI/glider.py → UNFINISHED/ChutesAI.py} +331 -326
- webscout/Provider/{GizAI.py → UNFINISHED/GizAI.py} +300 -295
- webscout/Provider/{Marcus.py → UNFINISHED/Marcus.py} +218 -198
- webscout/Provider/UNFINISHED/Qodo.py +481 -0
- webscout/Provider/{MCPCore.py → UNFINISHED/XenAI.py} +330 -315
- webscout/Provider/UNFINISHED/Youchat.py +347 -330
- webscout/Provider/UNFINISHED/aihumanizer.py +41 -0
- webscout/Provider/UNFINISHED/grammerchecker.py +37 -0
- webscout/Provider/UNFINISHED/liner.py +342 -0
- webscout/Provider/UNFINISHED/liner_api_request.py +246 -263
- webscout/Provider/{samurai.py → UNFINISHED/samurai.py} +231 -224
- webscout/Provider/WiseCat.py +256 -233
- webscout/Provider/WrDoChat.py +390 -370
- webscout/Provider/__init__.py +115 -174
- webscout/Provider/ai4chat.py +181 -174
- webscout/Provider/akashgpt.py +330 -335
- webscout/Provider/cerebras.py +397 -290
- webscout/Provider/cleeai.py +236 -213
- webscout/Provider/elmo.py +291 -283
- webscout/Provider/geminiapi.py +343 -208
- webscout/Provider/julius.py +245 -223
- webscout/Provider/learnfastai.py +333 -325
- webscout/Provider/llama3mitril.py +230 -215
- webscout/Provider/llmchat.py +308 -258
- webscout/Provider/llmchatco.py +321 -306
- webscout/Provider/meta.py +996 -801
- webscout/Provider/oivscode.py +332 -309
- webscout/Provider/searchchat.py +316 -292
- webscout/Provider/sonus.py +264 -258
- webscout/Provider/toolbaz.py +359 -353
- webscout/Provider/turboseek.py +332 -266
- webscout/Provider/typefully.py +262 -202
- webscout/Provider/x0gpt.py +332 -299
- webscout/__init__.py +31 -39
- webscout/__main__.py +5 -5
- webscout/cli.py +585 -524
- webscout/client.py +1497 -70
- webscout/conversation.py +140 -436
- webscout/exceptions.py +383 -362
- webscout/litagent/__init__.py +29 -29
- webscout/litagent/agent.py +492 -455
- webscout/litagent/constants.py +60 -60
- webscout/models.py +505 -181
- webscout/optimizers.py +74 -420
- webscout/prompt_manager.py +376 -288
- webscout/sanitize.py +1514 -0
- webscout/scout/README.md +452 -404
- webscout/scout/__init__.py +8 -8
- webscout/scout/core/__init__.py +7 -7
- webscout/scout/core/crawler.py +330 -210
- webscout/scout/core/scout.py +800 -607
- webscout/scout/core/search_result.py +51 -96
- webscout/scout/core/text_analyzer.py +64 -63
- webscout/scout/core/text_utils.py +412 -277
- webscout/scout/core/web_analyzer.py +54 -52
- webscout/scout/element.py +872 -478
- webscout/scout/parsers/__init__.py +70 -69
- webscout/scout/parsers/html5lib_parser.py +182 -172
- webscout/scout/parsers/html_parser.py +238 -236
- webscout/scout/parsers/lxml_parser.py +203 -178
- webscout/scout/utils.py +38 -37
- webscout/search/__init__.py +47 -0
- webscout/search/base.py +201 -0
- webscout/search/bing_main.py +45 -0
- webscout/search/brave_main.py +92 -0
- webscout/search/duckduckgo_main.py +57 -0
- webscout/search/engines/__init__.py +127 -0
- webscout/search/engines/bing/__init__.py +15 -0
- webscout/search/engines/bing/base.py +35 -0
- webscout/search/engines/bing/images.py +114 -0
- webscout/search/engines/bing/news.py +96 -0
- webscout/search/engines/bing/suggestions.py +36 -0
- webscout/search/engines/bing/text.py +109 -0
- webscout/search/engines/brave/__init__.py +19 -0
- webscout/search/engines/brave/base.py +47 -0
- webscout/search/engines/brave/images.py +213 -0
- webscout/search/engines/brave/news.py +353 -0
- webscout/search/engines/brave/suggestions.py +318 -0
- webscout/search/engines/brave/text.py +167 -0
- webscout/search/engines/brave/videos.py +364 -0
- webscout/search/engines/duckduckgo/__init__.py +25 -0
- webscout/search/engines/duckduckgo/answers.py +80 -0
- webscout/search/engines/duckduckgo/base.py +189 -0
- webscout/search/engines/duckduckgo/images.py +100 -0
- webscout/search/engines/duckduckgo/maps.py +183 -0
- webscout/search/engines/duckduckgo/news.py +70 -0
- webscout/search/engines/duckduckgo/suggestions.py +22 -0
- webscout/search/engines/duckduckgo/text.py +221 -0
- webscout/search/engines/duckduckgo/translate.py +48 -0
- webscout/search/engines/duckduckgo/videos.py +80 -0
- webscout/search/engines/duckduckgo/weather.py +84 -0
- webscout/search/engines/mojeek.py +61 -0
- webscout/search/engines/wikipedia.py +77 -0
- webscout/search/engines/yahoo/__init__.py +41 -0
- webscout/search/engines/yahoo/answers.py +19 -0
- webscout/search/engines/yahoo/base.py +34 -0
- webscout/search/engines/yahoo/images.py +323 -0
- webscout/search/engines/yahoo/maps.py +19 -0
- webscout/search/engines/yahoo/news.py +258 -0
- webscout/search/engines/yahoo/suggestions.py +140 -0
- webscout/search/engines/yahoo/text.py +273 -0
- webscout/search/engines/yahoo/translate.py +19 -0
- webscout/search/engines/yahoo/videos.py +302 -0
- webscout/search/engines/yahoo/weather.py +220 -0
- webscout/search/engines/yandex.py +67 -0
- webscout/search/engines/yep/__init__.py +13 -0
- webscout/search/engines/yep/base.py +34 -0
- webscout/search/engines/yep/images.py +101 -0
- webscout/search/engines/yep/suggestions.py +38 -0
- webscout/search/engines/yep/text.py +99 -0
- webscout/search/http_client.py +172 -0
- webscout/search/results.py +141 -0
- webscout/search/yahoo_main.py +57 -0
- webscout/search/yep_main.py +48 -0
- webscout/server/__init__.py +48 -0
- webscout/server/config.py +78 -0
- webscout/server/exceptions.py +69 -0
- webscout/server/providers.py +286 -0
- webscout/server/request_models.py +131 -0
- webscout/server/request_processing.py +404 -0
- webscout/server/routes.py +642 -0
- webscout/server/server.py +351 -0
- webscout/server/ui_templates.py +1171 -0
- webscout/swiftcli/__init__.py +79 -95
- webscout/swiftcli/core/__init__.py +7 -7
- webscout/swiftcli/core/cli.py +574 -297
- webscout/swiftcli/core/context.py +98 -104
- webscout/swiftcli/core/group.py +268 -241
- webscout/swiftcli/decorators/__init__.py +28 -28
- webscout/swiftcli/decorators/command.py +243 -221
- webscout/swiftcli/decorators/options.py +247 -220
- webscout/swiftcli/decorators/output.py +392 -252
- webscout/swiftcli/exceptions.py +21 -21
- webscout/swiftcli/plugins/__init__.py +9 -9
- webscout/swiftcli/plugins/base.py +134 -135
- webscout/swiftcli/plugins/manager.py +269 -269
- webscout/swiftcli/utils/__init__.py +58 -59
- webscout/swiftcli/utils/formatting.py +251 -252
- webscout/swiftcli/utils/parsing.py +368 -267
- webscout/update_checker.py +280 -136
- webscout/utils.py +28 -14
- webscout/version.py +2 -1
- webscout/version.py.bak +3 -0
- webscout/zeroart/__init__.py +218 -135
- webscout/zeroart/base.py +70 -66
- webscout/zeroart/effects.py +155 -101
- webscout/zeroart/fonts.py +1799 -1239
- webscout-2026.1.19.dist-info/METADATA +638 -0
- webscout-2026.1.19.dist-info/RECORD +312 -0
- {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/WHEEL +1 -1
- {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/entry_points.txt +1 -1
- webscout/DWEBS.py +0 -520
- webscout/Extra/Act.md +0 -309
- webscout/Extra/GitToolkit/gitapi/README.md +0 -110
- webscout/Extra/autocoder/__init__.py +0 -9
- webscout/Extra/autocoder/autocoder.py +0 -1105
- webscout/Extra/autocoder/autocoder_utiles.py +0 -332
- webscout/Extra/gguf.md +0 -430
- webscout/Extra/weather.md +0 -281
- webscout/Litlogger/README.md +0 -10
- webscout/Litlogger/__init__.py +0 -15
- webscout/Litlogger/formats.py +0 -4
- webscout/Litlogger/handlers.py +0 -103
- webscout/Litlogger/levels.py +0 -13
- webscout/Litlogger/logger.py +0 -92
- webscout/Provider/AI21.py +0 -177
- webscout/Provider/AISEARCH/DeepFind.py +0 -254
- webscout/Provider/AISEARCH/felo_search.py +0 -202
- webscout/Provider/AISEARCH/genspark_search.py +0 -324
- webscout/Provider/AISEARCH/hika_search.py +0 -186
- webscout/Provider/AISEARCH/scira_search.py +0 -298
- webscout/Provider/Aitopia.py +0 -316
- webscout/Provider/AllenAI.py +0 -440
- webscout/Provider/Blackboxai.py +0 -791
- webscout/Provider/ChatGPTClone.py +0 -237
- webscout/Provider/ChatGPTGratis.py +0 -194
- webscout/Provider/Cloudflare.py +0 -324
- webscout/Provider/ExaChat.py +0 -358
- webscout/Provider/Flowith.py +0 -217
- webscout/Provider/FreeGemini.py +0 -250
- webscout/Provider/Glider.py +0 -225
- webscout/Provider/HF_space/__init__.py +0 -0
- webscout/Provider/HF_space/qwen_qwen2.py +0 -206
- webscout/Provider/HuggingFaceChat.py +0 -469
- webscout/Provider/Hunyuan.py +0 -283
- webscout/Provider/LambdaChat.py +0 -411
- webscout/Provider/Llama3.py +0 -259
- webscout/Provider/Nemotron.py +0 -218
- webscout/Provider/OLLAMA.py +0 -396
- webscout/Provider/OPENAI/BLACKBOXAI.py +0 -766
- webscout/Provider/OPENAI/Cloudflare.py +0 -378
- webscout/Provider/OPENAI/FreeGemini.py +0 -283
- webscout/Provider/OPENAI/NEMOTRON.py +0 -232
- webscout/Provider/OPENAI/Qwen3.py +0 -283
- webscout/Provider/OPENAI/api.py +0 -969
- webscout/Provider/OPENAI/c4ai.py +0 -373
- webscout/Provider/OPENAI/chatgptclone.py +0 -494
- webscout/Provider/OPENAI/copilot.py +0 -242
- webscout/Provider/OPENAI/flowith.py +0 -162
- webscout/Provider/OPENAI/freeaichat.py +0 -359
- webscout/Provider/OPENAI/mcpcore.py +0 -389
- webscout/Provider/OPENAI/multichat.py +0 -376
- webscout/Provider/OPENAI/opkfc.py +0 -496
- webscout/Provider/OPENAI/scirachat.py +0 -477
- webscout/Provider/OPENAI/standardinput.py +0 -433
- webscout/Provider/OPENAI/typegpt.py +0 -364
- webscout/Provider/OPENAI/uncovrAI.py +0 -463
- webscout/Provider/OPENAI/venice.py +0 -431
- webscout/Provider/OPENAI/yep.py +0 -382
- webscout/Provider/OpenGPT.py +0 -209
- webscout/Provider/Perplexitylabs.py +0 -415
- webscout/Provider/Reka.py +0 -214
- webscout/Provider/StandardInput.py +0 -290
- webscout/Provider/TTI/aiarta.py +0 -365
- webscout/Provider/TTI/artbit.py +0 -0
- webscout/Provider/TTI/fastflux.py +0 -200
- webscout/Provider/TTI/piclumen.py +0 -203
- webscout/Provider/TTI/pixelmuse.py +0 -225
- webscout/Provider/TTS/gesserit.py +0 -128
- webscout/Provider/TTS/sthir.py +0 -94
- webscout/Provider/TeachAnything.py +0 -229
- webscout/Provider/UNFINISHED/puterjs.py +0 -635
- webscout/Provider/UNFINISHED/test_lmarena.py +0 -119
- webscout/Provider/Venice.py +0 -258
- webscout/Provider/VercelAI.py +0 -253
- webscout/Provider/Writecream.py +0 -246
- webscout/Provider/WritingMate.py +0 -269
- webscout/Provider/asksteve.py +0 -220
- webscout/Provider/chatglm.py +0 -215
- webscout/Provider/copilot.py +0 -425
- webscout/Provider/freeaichat.py +0 -285
- webscout/Provider/granite.py +0 -235
- webscout/Provider/hermes.py +0 -266
- webscout/Provider/koala.py +0 -170
- webscout/Provider/lmarena.py +0 -198
- webscout/Provider/multichat.py +0 -364
- webscout/Provider/scira_chat.py +0 -299
- webscout/Provider/scnet.py +0 -243
- webscout/Provider/talkai.py +0 -194
- webscout/Provider/typegpt.py +0 -289
- webscout/Provider/uncovr.py +0 -368
- webscout/Provider/yep.py +0 -389
- webscout/litagent/Readme.md +0 -276
- webscout/litprinter/__init__.py +0 -59
- webscout/swiftcli/Readme.md +0 -323
- webscout/tempid.py +0 -128
- webscout/webscout_search.py +0 -1184
- webscout/webscout_search_async.py +0 -654
- webscout/yep_search.py +0 -347
- webscout/zeroart/README.md +0 -89
- webscout-8.2.9.dist-info/METADATA +0 -1033
- webscout-8.2.9.dist-info/RECORD +0 -289
- {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/licenses/LICENSE.md +0 -0
- {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
"""Base class for DuckDuckGo search implementations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
7
|
+
from functools import cached_property
|
|
8
|
+
from itertools import cycle, islice
|
|
9
|
+
from random import choice
|
|
10
|
+
from time import sleep, time
|
|
11
|
+
from typing import Any, Optional, cast
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
import trio # type: ignore
|
|
15
|
+
except ImportError:
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
import curl_cffi.requests
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
from lxml.html import HTMLParser as LHTMLParser
|
|
22
|
+
from lxml.html import document_fromstring
|
|
23
|
+
LXML_AVAILABLE = True
|
|
24
|
+
except ImportError:
|
|
25
|
+
LXML_AVAILABLE = False
|
|
26
|
+
LHTMLParser = None # type: ignore
|
|
27
|
+
document_fromstring = None # type: ignore
|
|
28
|
+
|
|
29
|
+
from ....exceptions import RatelimitE, TimeoutE, WebscoutE
|
|
30
|
+
from ....litagent import LitAgent
|
|
31
|
+
from ....utils import (
|
|
32
|
+
_extract_vqd,
|
|
33
|
+
_normalize,
|
|
34
|
+
_normalize_url,
|
|
35
|
+
json_loads,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class DuckDuckGoBase:
|
|
40
|
+
"""Base class for DuckDuckGo search operations."""
|
|
41
|
+
|
|
42
|
+
_executor: ThreadPoolExecutor = ThreadPoolExecutor()
|
|
43
|
+
_impersonates = (
|
|
44
|
+
"chrome99", "chrome100", "chrome101", "chrome104", "chrome107", "chrome110",
|
|
45
|
+
"chrome116", "chrome119", "chrome120", "chrome123", "chrome124", "chrome131", "chrome133a",
|
|
46
|
+
"chrome99_android", "chrome131_android",
|
|
47
|
+
"safari15_3", "safari15_5", "safari17_0", "safari17_2_ios", "safari18_0", "safari18_0_ios",
|
|
48
|
+
"edge99", "edge101",
|
|
49
|
+
"firefox133", "firefox135",
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
headers: dict[str, str] | None = None,
|
|
55
|
+
proxy: str | None = None,
|
|
56
|
+
proxies: dict[str, str] | str | None = None,
|
|
57
|
+
timeout: int | None = 10,
|
|
58
|
+
verify: bool = True,
|
|
59
|
+
) -> None:
|
|
60
|
+
"""Initialize DuckDuckGo base client.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
headers: Dictionary of headers for the HTTP client.
|
|
64
|
+
proxy: Proxy for the HTTP client (http/https/socks5).
|
|
65
|
+
proxies: Deprecated, use proxy instead.
|
|
66
|
+
timeout: Timeout value for the HTTP client.
|
|
67
|
+
verify: SSL verification when making requests.
|
|
68
|
+
"""
|
|
69
|
+
ddgs_proxy: str | None = os.environ.get("DDGS_PROXY")
|
|
70
|
+
self.proxy: str | None = ddgs_proxy if ddgs_proxy else proxy
|
|
71
|
+
|
|
72
|
+
if not proxy and proxies:
|
|
73
|
+
self.proxy = proxies.get("http") or proxies.get("https") if isinstance(proxies, dict) else proxies
|
|
74
|
+
|
|
75
|
+
default_headers = {
|
|
76
|
+
**LitAgent().generate_fingerprint(),
|
|
77
|
+
"Origin": "https://duckduckgo.com",
|
|
78
|
+
"Referer": "https://duckduckgo.com/",
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
self.headers = headers if headers else {}
|
|
82
|
+
self.headers.update(default_headers)
|
|
83
|
+
|
|
84
|
+
impersonate_browser = choice(self._impersonates)
|
|
85
|
+
self.client = curl_cffi.requests.Session(
|
|
86
|
+
headers=self.headers,
|
|
87
|
+
proxies={'http': self.proxy, 'https': self.proxy} if self.proxy else None,
|
|
88
|
+
timeout=timeout,
|
|
89
|
+
impersonate=impersonate_browser,
|
|
90
|
+
verify=verify,
|
|
91
|
+
)
|
|
92
|
+
self.timeout = timeout
|
|
93
|
+
self.sleep_timestamp = 0.0
|
|
94
|
+
|
|
95
|
+
# Utility methods
|
|
96
|
+
self.cycle = cycle
|
|
97
|
+
self.islice = islice
|
|
98
|
+
|
|
99
|
+
@cached_property
|
|
100
|
+
def parser(self) -> Any:
|
|
101
|
+
"""Get HTML parser."""
|
|
102
|
+
if not LXML_AVAILABLE:
|
|
103
|
+
raise ImportError("lxml is required for HTML parsing")
|
|
104
|
+
|
|
105
|
+
class Parser:
|
|
106
|
+
def __init__(self):
|
|
107
|
+
self.lhtml_parser = LHTMLParser(
|
|
108
|
+
remove_blank_text=True,
|
|
109
|
+
remove_comments=True,
|
|
110
|
+
remove_pis=True,
|
|
111
|
+
collect_ids=False
|
|
112
|
+
) if LHTMLParser else None
|
|
113
|
+
self.etree = __import__('lxml.etree', fromlist=['Element'])
|
|
114
|
+
|
|
115
|
+
def fromstring(self, html: bytes | str) -> Any:
|
|
116
|
+
return document_fromstring(html, parser=self.lhtml_parser) if document_fromstring and self.lhtml_parser else None
|
|
117
|
+
|
|
118
|
+
return Parser()
|
|
119
|
+
|
|
120
|
+
def _sleep(self, sleeptime: float = 0.75) -> None:
|
|
121
|
+
"""Sleep between API requests."""
|
|
122
|
+
delay = 0.0 if not self.sleep_timestamp else 0.0 if time() - self.sleep_timestamp >= 20 else sleeptime
|
|
123
|
+
self.sleep_timestamp = time()
|
|
124
|
+
sleep(delay)
|
|
125
|
+
|
|
126
|
+
def _get_url(
|
|
127
|
+
self,
|
|
128
|
+
method: str,
|
|
129
|
+
url: str,
|
|
130
|
+
params: dict[str, str] | None = None,
|
|
131
|
+
content: bytes | None = None,
|
|
132
|
+
data: dict[str, str] | None = None,
|
|
133
|
+
headers: dict[str, str] | None = None,
|
|
134
|
+
cookies: dict[str, str] | None = None,
|
|
135
|
+
json: Any = None,
|
|
136
|
+
timeout: float | None = None,
|
|
137
|
+
) -> Any:
|
|
138
|
+
"""Make HTTP request."""
|
|
139
|
+
self._sleep()
|
|
140
|
+
try:
|
|
141
|
+
request_kwargs: dict[str, Any] = {
|
|
142
|
+
"params": params,
|
|
143
|
+
"headers": headers,
|
|
144
|
+
"json": json,
|
|
145
|
+
"timeout": timeout or self.timeout,
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
if isinstance(cookies, dict):
|
|
149
|
+
request_kwargs["cookies"] = cookies
|
|
150
|
+
|
|
151
|
+
if method == "GET":
|
|
152
|
+
if content:
|
|
153
|
+
request_kwargs["data"] = content
|
|
154
|
+
resp = self.client.get(url, **request_kwargs)
|
|
155
|
+
elif method == "POST":
|
|
156
|
+
if data or content:
|
|
157
|
+
request_kwargs["data"] = data or content
|
|
158
|
+
resp = self.client.post(url, **request_kwargs)
|
|
159
|
+
else:
|
|
160
|
+
if data or content:
|
|
161
|
+
request_kwargs["data"] = data or content
|
|
162
|
+
resp = self.client.request(cast(Any, method), url, **request_kwargs)
|
|
163
|
+
except Exception as ex:
|
|
164
|
+
if "time" in str(ex).lower():
|
|
165
|
+
raise TimeoutE(f"{url} {type(ex).__name__}: {ex}") from ex
|
|
166
|
+
raise WebscoutE(f"{url} {type(ex).__name__}: {ex}") from ex
|
|
167
|
+
|
|
168
|
+
if resp.status_code == 200:
|
|
169
|
+
return resp
|
|
170
|
+
elif resp.status_code in (202, 301, 403, 400, 429, 418):
|
|
171
|
+
raise RatelimitE(f"{resp.url} {resp.status_code} Ratelimit")
|
|
172
|
+
raise WebscoutE(f"{resp.url} return None. {params=} {content=} {data=}")
|
|
173
|
+
|
|
174
|
+
def _get_vqd(self, keywords: str) -> str:
|
|
175
|
+
"""Get vqd value for a search query."""
|
|
176
|
+
resp_content = self._get_url("GET", "https://duckduckgo.com", params={"q": keywords}).content
|
|
177
|
+
return _extract_vqd(resp_content, keywords)
|
|
178
|
+
|
|
179
|
+
def json_loads(self, obj: str | bytes) -> Any:
|
|
180
|
+
"""Load JSON from string or bytes."""
|
|
181
|
+
return json_loads(obj)
|
|
182
|
+
|
|
183
|
+
def _normalize(self, text: str) -> str:
|
|
184
|
+
"""Normalize text."""
|
|
185
|
+
return _normalize(text)
|
|
186
|
+
|
|
187
|
+
def _normalize_url(self, url: str) -> str:
|
|
188
|
+
"""Normalize URL."""
|
|
189
|
+
return _normalize_url(url)
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""DuckDuckGo image search."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from ....search.results import ImagesResult
|
|
6
|
+
from .base import DuckDuckGoBase
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class DuckDuckGoImages(DuckDuckGoBase):
|
|
10
|
+
"""DuckDuckGo image search."""
|
|
11
|
+
|
|
12
|
+
name = "duckduckgo"
|
|
13
|
+
category = "images"
|
|
14
|
+
|
|
15
|
+
def run(self, *args, **kwargs) -> list[ImagesResult]:
|
|
16
|
+
"""Perform image search on DuckDuckGo.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
keywords: Search query.
|
|
20
|
+
region: Region code.
|
|
21
|
+
safesearch: on, moderate, or off.
|
|
22
|
+
timelimit: d, w, m, or y.
|
|
23
|
+
size: Small, Medium, Large, Wallpaper.
|
|
24
|
+
color: color name or Monochrome.
|
|
25
|
+
type_image: photo, clipart, gif, transparent, line.
|
|
26
|
+
layout: Square, Tall, Wide.
|
|
27
|
+
license_image: any, Public, Share, etc.
|
|
28
|
+
max_results: Maximum number of results.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
List of ImagesResult objects.
|
|
32
|
+
"""
|
|
33
|
+
keywords = args[0] if args else kwargs.get("keywords")
|
|
34
|
+
region = args[1] if len(args) > 1 else kwargs.get("region", "wt-wt")
|
|
35
|
+
safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
|
|
36
|
+
timelimit = args[3] if len(args) > 3 else kwargs.get("timelimit")
|
|
37
|
+
size = args[4] if len(args) > 4 else kwargs.get("size")
|
|
38
|
+
color = args[5] if len(args) > 5 else kwargs.get("color")
|
|
39
|
+
type_image = args[6] if len(args) > 6 else kwargs.get("type_image")
|
|
40
|
+
layout = args[7] if len(args) > 7 else kwargs.get("layout")
|
|
41
|
+
license_image = args[8] if len(args) > 8 else kwargs.get("license_image")
|
|
42
|
+
max_results = args[9] if len(args) > 9 else kwargs.get("max_results")
|
|
43
|
+
|
|
44
|
+
assert keywords, "keywords is mandatory"
|
|
45
|
+
|
|
46
|
+
vqd = self._get_vqd(keywords)
|
|
47
|
+
|
|
48
|
+
safesearch_base = {"on": "1", "moderate": "1", "off": "-1"}
|
|
49
|
+
timelimit = f"time:{timelimit}" if timelimit else ""
|
|
50
|
+
size = f"size:{size}" if size else ""
|
|
51
|
+
color = f"color:{color}" if color else ""
|
|
52
|
+
type_image = f"type:{type_image}" if type_image else ""
|
|
53
|
+
layout = f"layout:{layout}" if layout else ""
|
|
54
|
+
license_image = f"license:{license_image}" if license_image else ""
|
|
55
|
+
payload = {
|
|
56
|
+
"l": region,
|
|
57
|
+
"o": "json",
|
|
58
|
+
"q": keywords,
|
|
59
|
+
"vqd": vqd,
|
|
60
|
+
"f": f"{timelimit},{size},{color},{type_image},{layout},{license_image}",
|
|
61
|
+
"p": safesearch_base[safesearch.lower()],
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
cache = set()
|
|
65
|
+
results: list[ImagesResult] = []
|
|
66
|
+
|
|
67
|
+
def _images_page(s: int) -> list[ImagesResult]:
|
|
68
|
+
payload["s"] = f"{s}"
|
|
69
|
+
resp_content = self._get_url("GET", "https://duckduckgo.com/i.js", params=payload).content
|
|
70
|
+
resp_json = self.json_loads(resp_content)
|
|
71
|
+
|
|
72
|
+
page_data = resp_json.get("results", [])
|
|
73
|
+
page_results = []
|
|
74
|
+
for row in page_data:
|
|
75
|
+
image_url = row.get("image")
|
|
76
|
+
if image_url and image_url not in cache:
|
|
77
|
+
cache.add(image_url)
|
|
78
|
+
result = ImagesResult(
|
|
79
|
+
title=row["title"],
|
|
80
|
+
image=self._normalize_url(image_url),
|
|
81
|
+
thumbnail=self._normalize_url(row["thumbnail"]),
|
|
82
|
+
url=self._normalize_url(row["url"]),
|
|
83
|
+
height=row["height"],
|
|
84
|
+
width=row["width"],
|
|
85
|
+
source=row["source"],
|
|
86
|
+
)
|
|
87
|
+
page_results.append(result)
|
|
88
|
+
return page_results
|
|
89
|
+
|
|
90
|
+
slist = [0]
|
|
91
|
+
if max_results:
|
|
92
|
+
max_results = min(max_results, 500)
|
|
93
|
+
slist.extend(range(100, max_results, 100))
|
|
94
|
+
try:
|
|
95
|
+
for r in self._executor.map(_images_page, slist):
|
|
96
|
+
results.extend(r)
|
|
97
|
+
except Exception as e:
|
|
98
|
+
raise e
|
|
99
|
+
|
|
100
|
+
return list(self.islice(results, max_results))
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from decimal import Decimal
|
|
4
|
+
from math import sqrt
|
|
5
|
+
|
|
6
|
+
from ....exceptions import WebscoutE
|
|
7
|
+
from .base import DuckDuckGoBase
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DuckDuckGoMaps(DuckDuckGoBase):
|
|
11
|
+
name = "duckduckgo"
|
|
12
|
+
category = "maps"
|
|
13
|
+
def _calculate_distance(self, lat_t: Decimal, lon_l: Decimal, lat_b: Decimal, lon_r: Decimal) -> float:
|
|
14
|
+
"""Calculate the Euclidean distance between top-left and bottom-right corners of bounding box."""
|
|
15
|
+
# Convert to float for math operations
|
|
16
|
+
lat_t_f = float(lat_t)
|
|
17
|
+
lon_l_f = float(lon_l)
|
|
18
|
+
lat_b_f = float(lat_b)
|
|
19
|
+
lon_r_f = float(lon_r)
|
|
20
|
+
|
|
21
|
+
# Calculate Euclidean distance
|
|
22
|
+
distance = sqrt((lat_t_f - lat_b_f) ** 2 + (lon_r_f - lon_l_f) ** 2)
|
|
23
|
+
return distance
|
|
24
|
+
|
|
25
|
+
def run(self, *args, **kwargs) -> list[dict[str, str]]:
|
|
26
|
+
keywords = args[0] if args else kwargs.get("keywords")
|
|
27
|
+
place = args[1] if len(args) > 1 else kwargs.get("place")
|
|
28
|
+
street = args[2] if len(args) > 2 else kwargs.get("street")
|
|
29
|
+
city = args[3] if len(args) > 3 else kwargs.get("city")
|
|
30
|
+
county = args[4] if len(args) > 4 else kwargs.get("county")
|
|
31
|
+
state = args[5] if len(args) > 5 else kwargs.get("state")
|
|
32
|
+
country = args[6] if len(args) > 6 else kwargs.get("country")
|
|
33
|
+
postalcode = args[7] if len(args) > 7 else kwargs.get("postalcode")
|
|
34
|
+
latitude = args[8] if len(args) > 8 else kwargs.get("latitude")
|
|
35
|
+
longitude = args[9] if len(args) > 9 else kwargs.get("longitude")
|
|
36
|
+
radius = args[10] if len(args) > 10 else kwargs.get("radius", 0)
|
|
37
|
+
max_results = args[11] if len(args) > 11 else kwargs.get("max_results")
|
|
38
|
+
|
|
39
|
+
assert keywords, "keywords is mandatory"
|
|
40
|
+
|
|
41
|
+
vqd = self._get_vqd(keywords)
|
|
42
|
+
|
|
43
|
+
# if longitude and latitude are specified, skip the request about bbox to the nominatim api
|
|
44
|
+
if latitude and longitude:
|
|
45
|
+
lat_t = Decimal(latitude.replace(",", "."))
|
|
46
|
+
lat_b = Decimal(latitude.replace(",", "."))
|
|
47
|
+
lon_l = Decimal(longitude.replace(",", "."))
|
|
48
|
+
lon_r = Decimal(longitude.replace(",", "."))
|
|
49
|
+
if radius == 0:
|
|
50
|
+
radius = 1
|
|
51
|
+
# otherwise request about bbox to nominatim api
|
|
52
|
+
else:
|
|
53
|
+
if place:
|
|
54
|
+
params = {
|
|
55
|
+
"q": place,
|
|
56
|
+
"polygon_geojson": "0",
|
|
57
|
+
"format": "jsonv2",
|
|
58
|
+
}
|
|
59
|
+
else:
|
|
60
|
+
params = {
|
|
61
|
+
"polygon_geojson": "0",
|
|
62
|
+
"format": "jsonv2",
|
|
63
|
+
}
|
|
64
|
+
if street:
|
|
65
|
+
params["street"] = street
|
|
66
|
+
if city:
|
|
67
|
+
params["city"] = city
|
|
68
|
+
if county:
|
|
69
|
+
params["county"] = county
|
|
70
|
+
if state:
|
|
71
|
+
params["state"] = state
|
|
72
|
+
if country:
|
|
73
|
+
params["country"] = country
|
|
74
|
+
if postalcode:
|
|
75
|
+
params["postalcode"] = postalcode
|
|
76
|
+
# request nominatim api to get coordinates box
|
|
77
|
+
resp_content = self._get_url(
|
|
78
|
+
"GET",
|
|
79
|
+
"https://nominatim.openstreetmap.org/search.php",
|
|
80
|
+
params=params,
|
|
81
|
+
).content
|
|
82
|
+
if resp_content == b"[]":
|
|
83
|
+
raise WebscoutE("maps() Coordinates are not found, check function parameters.")
|
|
84
|
+
resp_json = self.json_loads(resp_content)
|
|
85
|
+
coordinates = resp_json[0]["boundingbox"]
|
|
86
|
+
lat_t, lon_l = Decimal(coordinates[1]), Decimal(coordinates[2])
|
|
87
|
+
lat_b, lon_r = Decimal(coordinates[0]), Decimal(coordinates[3])
|
|
88
|
+
|
|
89
|
+
# if a radius is specified, expand the search square
|
|
90
|
+
lat_t += Decimal(radius) * Decimal(0.008983)
|
|
91
|
+
lat_b -= Decimal(radius) * Decimal(0.008983)
|
|
92
|
+
lon_l -= Decimal(radius) * Decimal(0.008983)
|
|
93
|
+
lon_r += Decimal(radius) * Decimal(0.008983)
|
|
94
|
+
|
|
95
|
+
cache = set()
|
|
96
|
+
results: list[dict[str, str]] = []
|
|
97
|
+
|
|
98
|
+
def _maps_page(
|
|
99
|
+
bbox: tuple[Decimal, Decimal, Decimal, Decimal],
|
|
100
|
+
) -> list[dict[str, str]] | None:
|
|
101
|
+
if max_results and len(results) >= max_results:
|
|
102
|
+
return None
|
|
103
|
+
lat_t, lon_l, lat_b, lon_r = bbox
|
|
104
|
+
params = {
|
|
105
|
+
"q": keywords,
|
|
106
|
+
"vqd": vqd,
|
|
107
|
+
"tg": "maps_places",
|
|
108
|
+
"rt": "D",
|
|
109
|
+
"mkexp": "b",
|
|
110
|
+
"wiki_info": "1",
|
|
111
|
+
"is_requery": "1",
|
|
112
|
+
"bbox_tl": f"{lat_t},{lon_l}",
|
|
113
|
+
"bbox_br": f"{lat_b},{lon_r}",
|
|
114
|
+
"strict_bbox": "1",
|
|
115
|
+
}
|
|
116
|
+
resp_content = self._get_url("GET", "https://duckduckgo.com/local.js", params=params).content
|
|
117
|
+
resp_json = self.json_loads(resp_content)
|
|
118
|
+
page_data = resp_json.get("results", [])
|
|
119
|
+
|
|
120
|
+
page_results = []
|
|
121
|
+
for res in page_data:
|
|
122
|
+
r_name = f'{res["name"]} {res["address"]}'
|
|
123
|
+
if r_name in cache:
|
|
124
|
+
continue
|
|
125
|
+
else:
|
|
126
|
+
cache.add(r_name)
|
|
127
|
+
result = {
|
|
128
|
+
"title": res["name"],
|
|
129
|
+
"address": res["address"],
|
|
130
|
+
"country_code": res["country_code"],
|
|
131
|
+
"url": self._normalize_url(res["website"]),
|
|
132
|
+
"phone": res["phone"] or "",
|
|
133
|
+
"latitude": res["coordinates"]["latitude"],
|
|
134
|
+
"longitude": res["coordinates"]["longitude"],
|
|
135
|
+
"source": self._normalize_url(res["url"]),
|
|
136
|
+
"image": x.get("image", "") if (x := res["embed"]) else "",
|
|
137
|
+
"desc": x.get("description", "") if (x := res["embed"]) else "",
|
|
138
|
+
"hours": res["hours"] or "",
|
|
139
|
+
"category": res["ddg_category"] or "",
|
|
140
|
+
"facebook": f"www.facebook.com/profile.php?id={x}" if (x := res["facebook_id"]) else "",
|
|
141
|
+
"instagram": f"https://www.instagram.com/{x}" if (x := res["instagram_id"]) else "",
|
|
142
|
+
"twitter": f"https://twitter.com/{x}" if (x := res["twitter_id"]) else "",
|
|
143
|
+
}
|
|
144
|
+
page_results.append(result)
|
|
145
|
+
return page_results
|
|
146
|
+
|
|
147
|
+
start_bbox = (lat_t, lon_l, lat_b, lon_r)
|
|
148
|
+
work_bboxes = [start_bbox]
|
|
149
|
+
while work_bboxes:
|
|
150
|
+
queue_bboxes = []
|
|
151
|
+
tasks = []
|
|
152
|
+
for bbox in work_bboxes:
|
|
153
|
+
tasks.append(bbox)
|
|
154
|
+
if self._calculate_distance(lat_t, lon_l, lat_b, lon_r) > 1:
|
|
155
|
+
lat_t, lon_l, lat_b, lon_r = bbox
|
|
156
|
+
lat_middle = (lat_t + lat_b) / 2
|
|
157
|
+
lon_middle = (lon_l + lon_r) / 2
|
|
158
|
+
bbox1 = (lat_t, lon_l, lat_middle, lon_middle)
|
|
159
|
+
bbox2 = (lat_t, lon_middle, lat_middle, lon_r)
|
|
160
|
+
bbox3 = (lat_middle, lon_l, lat_b, lon_middle)
|
|
161
|
+
bbox4 = (lat_middle, lon_middle, lat_b, lon_r)
|
|
162
|
+
queue_bboxes.extend([bbox1, bbox2, bbox3, bbox4])
|
|
163
|
+
|
|
164
|
+
work_bboxes_results = []
|
|
165
|
+
try:
|
|
166
|
+
for r in self._executor.map(_maps_page, tasks):
|
|
167
|
+
if r:
|
|
168
|
+
work_bboxes_results.extend(r)
|
|
169
|
+
except Exception as e:
|
|
170
|
+
raise e
|
|
171
|
+
|
|
172
|
+
for x in work_bboxes_results:
|
|
173
|
+
if isinstance(x, list):
|
|
174
|
+
results.extend(x)
|
|
175
|
+
elif isinstance(x, dict):
|
|
176
|
+
results.append(x)
|
|
177
|
+
|
|
178
|
+
work_bboxes = queue_bboxes
|
|
179
|
+
if not max_results or len(results) >= max_results or len(work_bboxes_results) == 0:
|
|
180
|
+
break
|
|
181
|
+
|
|
182
|
+
return list(self.islice(results, max_results))
|
|
183
|
+
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
|
|
5
|
+
from ....search.results import NewsResult
|
|
6
|
+
from .base import DuckDuckGoBase
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class DuckDuckGoNews(DuckDuckGoBase):
|
|
10
|
+
name = "duckduckgo"
|
|
11
|
+
category = "news"
|
|
12
|
+
def run(self, *args, **kwargs) -> list[NewsResult]:
|
|
13
|
+
keywords = args[0] if args else kwargs.get("keywords")
|
|
14
|
+
region = args[1] if len(args) > 1 else kwargs.get("region", "wt-wt")
|
|
15
|
+
safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
|
|
16
|
+
timelimit = args[3] if len(args) > 3 else kwargs.get("timelimit")
|
|
17
|
+
max_results = args[4] if len(args) > 4 else kwargs.get("max_results")
|
|
18
|
+
|
|
19
|
+
assert keywords, "keywords is mandatory"
|
|
20
|
+
|
|
21
|
+
vqd = self._get_vqd(keywords)
|
|
22
|
+
|
|
23
|
+
safesearch_base = {"on": "1", "moderate": "-1", "off": "-2"}
|
|
24
|
+
payload = {
|
|
25
|
+
"l": region,
|
|
26
|
+
"o": "json",
|
|
27
|
+
"noamp": "1",
|
|
28
|
+
"q": keywords,
|
|
29
|
+
"vqd": vqd,
|
|
30
|
+
"p": safesearch_base[safesearch.lower()],
|
|
31
|
+
}
|
|
32
|
+
if timelimit:
|
|
33
|
+
payload["df"] = timelimit
|
|
34
|
+
|
|
35
|
+
cache = set()
|
|
36
|
+
results: list[NewsResult] = []
|
|
37
|
+
|
|
38
|
+
def _news_page(s: int) -> list[NewsResult]:
|
|
39
|
+
payload["s"] = f"{s}"
|
|
40
|
+
resp_content = self._get_url("GET", "https://duckduckgo.com/news.js", params=payload).content
|
|
41
|
+
resp_json = self.json_loads(resp_content)
|
|
42
|
+
page_data = resp_json.get("results", [])
|
|
43
|
+
page_results = []
|
|
44
|
+
for row in page_data:
|
|
45
|
+
if row["url"] not in cache:
|
|
46
|
+
cache.add(row["url"])
|
|
47
|
+
image_url = row.get("image", None)
|
|
48
|
+
result = NewsResult(
|
|
49
|
+
date=datetime.fromtimestamp(row["date"], timezone.utc).isoformat(),
|
|
50
|
+
title=row["title"],
|
|
51
|
+
body=self._normalize(row["excerpt"]),
|
|
52
|
+
url=self._normalize_url(row["url"]),
|
|
53
|
+
image=self._normalize_url(image_url),
|
|
54
|
+
source=row["source"],
|
|
55
|
+
)
|
|
56
|
+
page_results.append(result)
|
|
57
|
+
return page_results
|
|
58
|
+
|
|
59
|
+
slist = [0]
|
|
60
|
+
if max_results:
|
|
61
|
+
max_results = min(max_results, 120)
|
|
62
|
+
slist.extend(range(30, max_results, 30))
|
|
63
|
+
try:
|
|
64
|
+
for r in self._executor.map(_news_page, slist):
|
|
65
|
+
results.extend(r)
|
|
66
|
+
except Exception as e:
|
|
67
|
+
raise e
|
|
68
|
+
|
|
69
|
+
return list(self.islice(results, max_results))
|
|
70
|
+
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .base import DuckDuckGoBase
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DuckDuckGoSuggestions(DuckDuckGoBase):
|
|
7
|
+
name = "duckduckgo"
|
|
8
|
+
category = "suggestions"
|
|
9
|
+
def run(self, *args, **kwargs) -> list[dict[str, str]]:
|
|
10
|
+
keywords = args[0] if args else kwargs.get("keywords")
|
|
11
|
+
region = args[1] if len(args) > 1 else kwargs.get("region", "wt-wt")
|
|
12
|
+
|
|
13
|
+
assert keywords, "keywords is mandatory"
|
|
14
|
+
|
|
15
|
+
payload = {
|
|
16
|
+
"q": keywords,
|
|
17
|
+
"kl": region,
|
|
18
|
+
}
|
|
19
|
+
resp_content = self._get_url("GET", "https://duckduckgo.com/ac/", params=payload).content
|
|
20
|
+
page_data = self.json_loads(resp_content)
|
|
21
|
+
return [r for r in page_data]
|
|
22
|
+
|