webscout 8.2.9__py3-none-any.whl → 2026.1.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- webscout/AIauto.py +524 -251
- webscout/AIbase.py +247 -319
- webscout/AIutel.py +68 -703
- webscout/Bard.py +1072 -1026
- webscout/Extra/GitToolkit/__init__.py +10 -10
- webscout/Extra/GitToolkit/gitapi/__init__.py +20 -12
- webscout/Extra/GitToolkit/gitapi/gist.py +142 -0
- webscout/Extra/GitToolkit/gitapi/organization.py +91 -0
- webscout/Extra/GitToolkit/gitapi/repository.py +308 -195
- webscout/Extra/GitToolkit/gitapi/search.py +162 -0
- webscout/Extra/GitToolkit/gitapi/trending.py +236 -0
- webscout/Extra/GitToolkit/gitapi/user.py +128 -96
- webscout/Extra/GitToolkit/gitapi/utils.py +82 -62
- webscout/Extra/YTToolkit/README.md +443 -375
- webscout/Extra/YTToolkit/YTdownloader.py +953 -957
- webscout/Extra/YTToolkit/__init__.py +3 -3
- webscout/Extra/YTToolkit/transcriber.py +595 -476
- webscout/Extra/YTToolkit/ytapi/README.md +230 -44
- webscout/Extra/YTToolkit/ytapi/__init__.py +22 -6
- webscout/Extra/YTToolkit/ytapi/captions.py +190 -0
- webscout/Extra/YTToolkit/ytapi/channel.py +302 -307
- webscout/Extra/YTToolkit/ytapi/errors.py +13 -13
- webscout/Extra/YTToolkit/ytapi/extras.py +178 -118
- webscout/Extra/YTToolkit/ytapi/hashtag.py +120 -0
- webscout/Extra/YTToolkit/ytapi/https.py +89 -88
- webscout/Extra/YTToolkit/ytapi/patterns.py +61 -61
- webscout/Extra/YTToolkit/ytapi/playlist.py +59 -59
- webscout/Extra/YTToolkit/ytapi/pool.py +8 -8
- webscout/Extra/YTToolkit/ytapi/query.py +143 -40
- webscout/Extra/YTToolkit/ytapi/shorts.py +122 -0
- webscout/Extra/YTToolkit/ytapi/stream.py +68 -63
- webscout/Extra/YTToolkit/ytapi/suggestions.py +97 -0
- webscout/Extra/YTToolkit/ytapi/utils.py +66 -62
- webscout/Extra/YTToolkit/ytapi/video.py +403 -232
- webscout/Extra/__init__.py +2 -3
- webscout/Extra/gguf.py +1298 -684
- webscout/Extra/tempmail/README.md +487 -487
- webscout/Extra/tempmail/__init__.py +28 -28
- webscout/Extra/tempmail/async_utils.py +143 -141
- webscout/Extra/tempmail/base.py +172 -161
- webscout/Extra/tempmail/cli.py +191 -187
- webscout/Extra/tempmail/emailnator.py +88 -84
- webscout/Extra/tempmail/mail_tm.py +378 -361
- webscout/Extra/tempmail/temp_mail_io.py +304 -292
- webscout/Extra/weather.py +196 -194
- webscout/Extra/weather_ascii.py +17 -15
- webscout/Provider/AISEARCH/PERPLEXED_search.py +175 -0
- webscout/Provider/AISEARCH/Perplexity.py +292 -333
- webscout/Provider/AISEARCH/README.md +106 -279
- webscout/Provider/AISEARCH/__init__.py +16 -9
- webscout/Provider/AISEARCH/brave_search.py +298 -0
- webscout/Provider/AISEARCH/iask_search.py +357 -410
- webscout/Provider/AISEARCH/monica_search.py +200 -220
- webscout/Provider/AISEARCH/webpilotai_search.py +242 -255
- webscout/Provider/Algion.py +413 -0
- webscout/Provider/Andi.py +74 -69
- webscout/Provider/Apriel.py +313 -0
- webscout/Provider/Ayle.py +323 -0
- webscout/Provider/ChatSandbox.py +329 -342
- webscout/Provider/ClaudeOnline.py +365 -0
- webscout/Provider/Cohere.py +232 -208
- webscout/Provider/DeepAI.py +367 -0
- webscout/Provider/Deepinfra.py +467 -340
- webscout/Provider/EssentialAI.py +217 -0
- webscout/Provider/ExaAI.py +274 -261
- webscout/Provider/Gemini.py +175 -169
- webscout/Provider/GithubChat.py +385 -369
- webscout/Provider/Gradient.py +286 -0
- webscout/Provider/Groq.py +556 -801
- webscout/Provider/HadadXYZ.py +323 -0
- webscout/Provider/HeckAI.py +392 -375
- webscout/Provider/HuggingFace.py +387 -0
- webscout/Provider/IBM.py +340 -0
- webscout/Provider/Jadve.py +317 -291
- webscout/Provider/K2Think.py +306 -0
- webscout/Provider/Koboldai.py +221 -384
- webscout/Provider/Netwrck.py +273 -270
- webscout/Provider/Nvidia.py +310 -0
- webscout/Provider/OPENAI/DeepAI.py +489 -0
- webscout/Provider/OPENAI/K2Think.py +423 -0
- webscout/Provider/OPENAI/PI.py +463 -0
- webscout/Provider/OPENAI/README.md +890 -952
- webscout/Provider/OPENAI/TogetherAI.py +405 -0
- webscout/Provider/OPENAI/TwoAI.py +255 -357
- webscout/Provider/OPENAI/__init__.py +148 -40
- webscout/Provider/OPENAI/ai4chat.py +348 -293
- webscout/Provider/OPENAI/akashgpt.py +436 -0
- webscout/Provider/OPENAI/algion.py +303 -0
- webscout/Provider/OPENAI/{exachat.py → ayle.py} +365 -444
- webscout/Provider/OPENAI/base.py +253 -249
- webscout/Provider/OPENAI/cerebras.py +296 -0
- webscout/Provider/OPENAI/chatgpt.py +870 -556
- webscout/Provider/OPENAI/chatsandbox.py +233 -173
- webscout/Provider/OPENAI/deepinfra.py +403 -322
- webscout/Provider/OPENAI/e2b.py +2370 -1414
- webscout/Provider/OPENAI/elmo.py +278 -0
- webscout/Provider/OPENAI/exaai.py +452 -417
- webscout/Provider/OPENAI/freeassist.py +446 -0
- webscout/Provider/OPENAI/gradient.py +448 -0
- webscout/Provider/OPENAI/groq.py +380 -364
- webscout/Provider/OPENAI/hadadxyz.py +292 -0
- webscout/Provider/OPENAI/heckai.py +333 -308
- webscout/Provider/OPENAI/huggingface.py +321 -0
- webscout/Provider/OPENAI/ibm.py +425 -0
- webscout/Provider/OPENAI/llmchat.py +253 -0
- webscout/Provider/OPENAI/llmchatco.py +378 -335
- webscout/Provider/OPENAI/meta.py +541 -0
- webscout/Provider/OPENAI/netwrck.py +374 -357
- webscout/Provider/OPENAI/nvidia.py +317 -0
- webscout/Provider/OPENAI/oivscode.py +348 -287
- webscout/Provider/OPENAI/openrouter.py +328 -0
- webscout/Provider/OPENAI/pydantic_imports.py +1 -172
- webscout/Provider/OPENAI/sambanova.py +397 -0
- webscout/Provider/OPENAI/sonus.py +305 -304
- webscout/Provider/OPENAI/textpollinations.py +370 -339
- webscout/Provider/OPENAI/toolbaz.py +375 -413
- webscout/Provider/OPENAI/typefully.py +419 -355
- webscout/Provider/OPENAI/typliai.py +279 -0
- webscout/Provider/OPENAI/utils.py +314 -318
- webscout/Provider/OPENAI/wisecat.py +359 -387
- webscout/Provider/OPENAI/writecream.py +185 -163
- webscout/Provider/OPENAI/x0gpt.py +462 -365
- webscout/Provider/OPENAI/zenmux.py +380 -0
- webscout/Provider/OpenRouter.py +386 -0
- webscout/Provider/Openai.py +337 -496
- webscout/Provider/PI.py +443 -429
- webscout/Provider/QwenLM.py +346 -254
- webscout/Provider/STT/__init__.py +28 -0
- webscout/Provider/STT/base.py +303 -0
- webscout/Provider/STT/elevenlabs.py +264 -0
- webscout/Provider/Sambanova.py +317 -0
- webscout/Provider/TTI/README.md +69 -82
- webscout/Provider/TTI/__init__.py +37 -7
- webscout/Provider/TTI/base.py +147 -64
- webscout/Provider/TTI/claudeonline.py +393 -0
- webscout/Provider/TTI/magicstudio.py +292 -201
- webscout/Provider/TTI/miragic.py +180 -0
- webscout/Provider/TTI/pollinations.py +331 -221
- webscout/Provider/TTI/together.py +334 -0
- webscout/Provider/TTI/utils.py +14 -11
- webscout/Provider/TTS/README.md +186 -192
- webscout/Provider/TTS/__init__.py +43 -10
- webscout/Provider/TTS/base.py +523 -159
- webscout/Provider/TTS/deepgram.py +286 -156
- webscout/Provider/TTS/elevenlabs.py +189 -111
- webscout/Provider/TTS/freetts.py +218 -0
- webscout/Provider/TTS/murfai.py +288 -113
- webscout/Provider/TTS/openai_fm.py +364 -129
- webscout/Provider/TTS/parler.py +203 -111
- webscout/Provider/TTS/qwen.py +334 -0
- webscout/Provider/TTS/sherpa.py +286 -0
- webscout/Provider/TTS/speechma.py +693 -580
- webscout/Provider/TTS/streamElements.py +275 -333
- webscout/Provider/TTS/utils.py +280 -280
- webscout/Provider/TextPollinationsAI.py +331 -308
- webscout/Provider/TogetherAI.py +450 -0
- webscout/Provider/TwoAI.py +309 -475
- webscout/Provider/TypliAI.py +311 -305
- webscout/Provider/UNFINISHED/ChatHub.py +219 -209
- webscout/Provider/{OPENAI/glider.py → UNFINISHED/ChutesAI.py} +331 -326
- webscout/Provider/{GizAI.py → UNFINISHED/GizAI.py} +300 -295
- webscout/Provider/{Marcus.py → UNFINISHED/Marcus.py} +218 -198
- webscout/Provider/UNFINISHED/Qodo.py +481 -0
- webscout/Provider/{MCPCore.py → UNFINISHED/XenAI.py} +330 -315
- webscout/Provider/UNFINISHED/Youchat.py +347 -330
- webscout/Provider/UNFINISHED/aihumanizer.py +41 -0
- webscout/Provider/UNFINISHED/grammerchecker.py +37 -0
- webscout/Provider/UNFINISHED/liner.py +342 -0
- webscout/Provider/UNFINISHED/liner_api_request.py +246 -263
- webscout/Provider/{samurai.py → UNFINISHED/samurai.py} +231 -224
- webscout/Provider/WiseCat.py +256 -233
- webscout/Provider/WrDoChat.py +390 -370
- webscout/Provider/__init__.py +115 -174
- webscout/Provider/ai4chat.py +181 -174
- webscout/Provider/akashgpt.py +330 -335
- webscout/Provider/cerebras.py +397 -290
- webscout/Provider/cleeai.py +236 -213
- webscout/Provider/elmo.py +291 -283
- webscout/Provider/geminiapi.py +343 -208
- webscout/Provider/julius.py +245 -223
- webscout/Provider/learnfastai.py +333 -325
- webscout/Provider/llama3mitril.py +230 -215
- webscout/Provider/llmchat.py +308 -258
- webscout/Provider/llmchatco.py +321 -306
- webscout/Provider/meta.py +996 -801
- webscout/Provider/oivscode.py +332 -309
- webscout/Provider/searchchat.py +316 -292
- webscout/Provider/sonus.py +264 -258
- webscout/Provider/toolbaz.py +359 -353
- webscout/Provider/turboseek.py +332 -266
- webscout/Provider/typefully.py +262 -202
- webscout/Provider/x0gpt.py +332 -299
- webscout/__init__.py +31 -39
- webscout/__main__.py +5 -5
- webscout/cli.py +585 -524
- webscout/client.py +1497 -70
- webscout/conversation.py +140 -436
- webscout/exceptions.py +383 -362
- webscout/litagent/__init__.py +29 -29
- webscout/litagent/agent.py +492 -455
- webscout/litagent/constants.py +60 -60
- webscout/models.py +505 -181
- webscout/optimizers.py +74 -420
- webscout/prompt_manager.py +376 -288
- webscout/sanitize.py +1514 -0
- webscout/scout/README.md +452 -404
- webscout/scout/__init__.py +8 -8
- webscout/scout/core/__init__.py +7 -7
- webscout/scout/core/crawler.py +330 -210
- webscout/scout/core/scout.py +800 -607
- webscout/scout/core/search_result.py +51 -96
- webscout/scout/core/text_analyzer.py +64 -63
- webscout/scout/core/text_utils.py +412 -277
- webscout/scout/core/web_analyzer.py +54 -52
- webscout/scout/element.py +872 -478
- webscout/scout/parsers/__init__.py +70 -69
- webscout/scout/parsers/html5lib_parser.py +182 -172
- webscout/scout/parsers/html_parser.py +238 -236
- webscout/scout/parsers/lxml_parser.py +203 -178
- webscout/scout/utils.py +38 -37
- webscout/search/__init__.py +47 -0
- webscout/search/base.py +201 -0
- webscout/search/bing_main.py +45 -0
- webscout/search/brave_main.py +92 -0
- webscout/search/duckduckgo_main.py +57 -0
- webscout/search/engines/__init__.py +127 -0
- webscout/search/engines/bing/__init__.py +15 -0
- webscout/search/engines/bing/base.py +35 -0
- webscout/search/engines/bing/images.py +114 -0
- webscout/search/engines/bing/news.py +96 -0
- webscout/search/engines/bing/suggestions.py +36 -0
- webscout/search/engines/bing/text.py +109 -0
- webscout/search/engines/brave/__init__.py +19 -0
- webscout/search/engines/brave/base.py +47 -0
- webscout/search/engines/brave/images.py +213 -0
- webscout/search/engines/brave/news.py +353 -0
- webscout/search/engines/brave/suggestions.py +318 -0
- webscout/search/engines/brave/text.py +167 -0
- webscout/search/engines/brave/videos.py +364 -0
- webscout/search/engines/duckduckgo/__init__.py +25 -0
- webscout/search/engines/duckduckgo/answers.py +80 -0
- webscout/search/engines/duckduckgo/base.py +189 -0
- webscout/search/engines/duckduckgo/images.py +100 -0
- webscout/search/engines/duckduckgo/maps.py +183 -0
- webscout/search/engines/duckduckgo/news.py +70 -0
- webscout/search/engines/duckduckgo/suggestions.py +22 -0
- webscout/search/engines/duckduckgo/text.py +221 -0
- webscout/search/engines/duckduckgo/translate.py +48 -0
- webscout/search/engines/duckduckgo/videos.py +80 -0
- webscout/search/engines/duckduckgo/weather.py +84 -0
- webscout/search/engines/mojeek.py +61 -0
- webscout/search/engines/wikipedia.py +77 -0
- webscout/search/engines/yahoo/__init__.py +41 -0
- webscout/search/engines/yahoo/answers.py +19 -0
- webscout/search/engines/yahoo/base.py +34 -0
- webscout/search/engines/yahoo/images.py +323 -0
- webscout/search/engines/yahoo/maps.py +19 -0
- webscout/search/engines/yahoo/news.py +258 -0
- webscout/search/engines/yahoo/suggestions.py +140 -0
- webscout/search/engines/yahoo/text.py +273 -0
- webscout/search/engines/yahoo/translate.py +19 -0
- webscout/search/engines/yahoo/videos.py +302 -0
- webscout/search/engines/yahoo/weather.py +220 -0
- webscout/search/engines/yandex.py +67 -0
- webscout/search/engines/yep/__init__.py +13 -0
- webscout/search/engines/yep/base.py +34 -0
- webscout/search/engines/yep/images.py +101 -0
- webscout/search/engines/yep/suggestions.py +38 -0
- webscout/search/engines/yep/text.py +99 -0
- webscout/search/http_client.py +172 -0
- webscout/search/results.py +141 -0
- webscout/search/yahoo_main.py +57 -0
- webscout/search/yep_main.py +48 -0
- webscout/server/__init__.py +48 -0
- webscout/server/config.py +78 -0
- webscout/server/exceptions.py +69 -0
- webscout/server/providers.py +286 -0
- webscout/server/request_models.py +131 -0
- webscout/server/request_processing.py +404 -0
- webscout/server/routes.py +642 -0
- webscout/server/server.py +351 -0
- webscout/server/ui_templates.py +1171 -0
- webscout/swiftcli/__init__.py +79 -95
- webscout/swiftcli/core/__init__.py +7 -7
- webscout/swiftcli/core/cli.py +574 -297
- webscout/swiftcli/core/context.py +98 -104
- webscout/swiftcli/core/group.py +268 -241
- webscout/swiftcli/decorators/__init__.py +28 -28
- webscout/swiftcli/decorators/command.py +243 -221
- webscout/swiftcli/decorators/options.py +247 -220
- webscout/swiftcli/decorators/output.py +392 -252
- webscout/swiftcli/exceptions.py +21 -21
- webscout/swiftcli/plugins/__init__.py +9 -9
- webscout/swiftcli/plugins/base.py +134 -135
- webscout/swiftcli/plugins/manager.py +269 -269
- webscout/swiftcli/utils/__init__.py +58 -59
- webscout/swiftcli/utils/formatting.py +251 -252
- webscout/swiftcli/utils/parsing.py +368 -267
- webscout/update_checker.py +280 -136
- webscout/utils.py +28 -14
- webscout/version.py +2 -1
- webscout/version.py.bak +3 -0
- webscout/zeroart/__init__.py +218 -135
- webscout/zeroart/base.py +70 -66
- webscout/zeroart/effects.py +155 -101
- webscout/zeroart/fonts.py +1799 -1239
- webscout-2026.1.19.dist-info/METADATA +638 -0
- webscout-2026.1.19.dist-info/RECORD +312 -0
- {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/WHEEL +1 -1
- {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/entry_points.txt +1 -1
- webscout/DWEBS.py +0 -520
- webscout/Extra/Act.md +0 -309
- webscout/Extra/GitToolkit/gitapi/README.md +0 -110
- webscout/Extra/autocoder/__init__.py +0 -9
- webscout/Extra/autocoder/autocoder.py +0 -1105
- webscout/Extra/autocoder/autocoder_utiles.py +0 -332
- webscout/Extra/gguf.md +0 -430
- webscout/Extra/weather.md +0 -281
- webscout/Litlogger/README.md +0 -10
- webscout/Litlogger/__init__.py +0 -15
- webscout/Litlogger/formats.py +0 -4
- webscout/Litlogger/handlers.py +0 -103
- webscout/Litlogger/levels.py +0 -13
- webscout/Litlogger/logger.py +0 -92
- webscout/Provider/AI21.py +0 -177
- webscout/Provider/AISEARCH/DeepFind.py +0 -254
- webscout/Provider/AISEARCH/felo_search.py +0 -202
- webscout/Provider/AISEARCH/genspark_search.py +0 -324
- webscout/Provider/AISEARCH/hika_search.py +0 -186
- webscout/Provider/AISEARCH/scira_search.py +0 -298
- webscout/Provider/Aitopia.py +0 -316
- webscout/Provider/AllenAI.py +0 -440
- webscout/Provider/Blackboxai.py +0 -791
- webscout/Provider/ChatGPTClone.py +0 -237
- webscout/Provider/ChatGPTGratis.py +0 -194
- webscout/Provider/Cloudflare.py +0 -324
- webscout/Provider/ExaChat.py +0 -358
- webscout/Provider/Flowith.py +0 -217
- webscout/Provider/FreeGemini.py +0 -250
- webscout/Provider/Glider.py +0 -225
- webscout/Provider/HF_space/__init__.py +0 -0
- webscout/Provider/HF_space/qwen_qwen2.py +0 -206
- webscout/Provider/HuggingFaceChat.py +0 -469
- webscout/Provider/Hunyuan.py +0 -283
- webscout/Provider/LambdaChat.py +0 -411
- webscout/Provider/Llama3.py +0 -259
- webscout/Provider/Nemotron.py +0 -218
- webscout/Provider/OLLAMA.py +0 -396
- webscout/Provider/OPENAI/BLACKBOXAI.py +0 -766
- webscout/Provider/OPENAI/Cloudflare.py +0 -378
- webscout/Provider/OPENAI/FreeGemini.py +0 -283
- webscout/Provider/OPENAI/NEMOTRON.py +0 -232
- webscout/Provider/OPENAI/Qwen3.py +0 -283
- webscout/Provider/OPENAI/api.py +0 -969
- webscout/Provider/OPENAI/c4ai.py +0 -373
- webscout/Provider/OPENAI/chatgptclone.py +0 -494
- webscout/Provider/OPENAI/copilot.py +0 -242
- webscout/Provider/OPENAI/flowith.py +0 -162
- webscout/Provider/OPENAI/freeaichat.py +0 -359
- webscout/Provider/OPENAI/mcpcore.py +0 -389
- webscout/Provider/OPENAI/multichat.py +0 -376
- webscout/Provider/OPENAI/opkfc.py +0 -496
- webscout/Provider/OPENAI/scirachat.py +0 -477
- webscout/Provider/OPENAI/standardinput.py +0 -433
- webscout/Provider/OPENAI/typegpt.py +0 -364
- webscout/Provider/OPENAI/uncovrAI.py +0 -463
- webscout/Provider/OPENAI/venice.py +0 -431
- webscout/Provider/OPENAI/yep.py +0 -382
- webscout/Provider/OpenGPT.py +0 -209
- webscout/Provider/Perplexitylabs.py +0 -415
- webscout/Provider/Reka.py +0 -214
- webscout/Provider/StandardInput.py +0 -290
- webscout/Provider/TTI/aiarta.py +0 -365
- webscout/Provider/TTI/artbit.py +0 -0
- webscout/Provider/TTI/fastflux.py +0 -200
- webscout/Provider/TTI/piclumen.py +0 -203
- webscout/Provider/TTI/pixelmuse.py +0 -225
- webscout/Provider/TTS/gesserit.py +0 -128
- webscout/Provider/TTS/sthir.py +0 -94
- webscout/Provider/TeachAnything.py +0 -229
- webscout/Provider/UNFINISHED/puterjs.py +0 -635
- webscout/Provider/UNFINISHED/test_lmarena.py +0 -119
- webscout/Provider/Venice.py +0 -258
- webscout/Provider/VercelAI.py +0 -253
- webscout/Provider/Writecream.py +0 -246
- webscout/Provider/WritingMate.py +0 -269
- webscout/Provider/asksteve.py +0 -220
- webscout/Provider/chatglm.py +0 -215
- webscout/Provider/copilot.py +0 -425
- webscout/Provider/freeaichat.py +0 -285
- webscout/Provider/granite.py +0 -235
- webscout/Provider/hermes.py +0 -266
- webscout/Provider/koala.py +0 -170
- webscout/Provider/lmarena.py +0 -198
- webscout/Provider/multichat.py +0 -364
- webscout/Provider/scira_chat.py +0 -299
- webscout/Provider/scnet.py +0 -243
- webscout/Provider/talkai.py +0 -194
- webscout/Provider/typegpt.py +0 -289
- webscout/Provider/uncovr.py +0 -368
- webscout/Provider/yep.py +0 -389
- webscout/litagent/Readme.md +0 -276
- webscout/litprinter/__init__.py +0 -59
- webscout/swiftcli/Readme.md +0 -323
- webscout/tempid.py +0 -128
- webscout/webscout_search.py +0 -1184
- webscout/webscout_search_async.py +0 -654
- webscout/yep_search.py +0 -347
- webscout/zeroart/README.md +0 -89
- webscout-8.2.9.dist-info/METADATA +0 -1033
- webscout-8.2.9.dist-info/RECORD +0 -289
- {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/licenses/LICENSE.md +0 -0
- {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
"""Yahoo video search engine."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Mapping
|
|
6
|
+
from typing import Any, Optional
|
|
7
|
+
from urllib.parse import parse_qs, urlparse
|
|
8
|
+
|
|
9
|
+
from ...results import VideosResult
|
|
10
|
+
from .base import YahooSearchEngine
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class YahooVideos(YahooSearchEngine[VideosResult]):
|
|
14
|
+
"""Yahoo video search engine with filters.
|
|
15
|
+
|
|
16
|
+
Features:
|
|
17
|
+
- Length filters (short, medium, long)
|
|
18
|
+
- Resolution filters (SD, HD, 4K)
|
|
19
|
+
- Source filters
|
|
20
|
+
- Time filters
|
|
21
|
+
- Pagination support
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
name = "yahoo"
|
|
25
|
+
category = "videos"
|
|
26
|
+
|
|
27
|
+
search_url = "https://video.search.yahoo.com/search/video"
|
|
28
|
+
search_method = "GET"
|
|
29
|
+
|
|
30
|
+
# XPath selectors for video results
|
|
31
|
+
items_xpath = "//div[@id='results']//div[contains(@class, 'dd') or contains(@class, 'vr')]"
|
|
32
|
+
elements_xpath: Mapping[str, str] = {
|
|
33
|
+
"title": ".//h3//a/text() | .//a/@title",
|
|
34
|
+
"url": ".//h3//a/@href | .//a/@href",
|
|
35
|
+
"thumbnail": ".//img/@src",
|
|
36
|
+
"duration": ".//span[contains(@class, 'time') or contains(@class, 'duration')]//text()",
|
|
37
|
+
"views": ".//span[contains(@class, 'views')]//text()",
|
|
38
|
+
"published": ".//span[contains(@class, 'date') or contains(@class, 'age')]//text()",
|
|
39
|
+
"source": ".//span[contains(@class, 'source')]//text()",
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
# Filter mappings
|
|
43
|
+
LENGTH_FILTERS = {
|
|
44
|
+
"short": "short", # < 4 minutes
|
|
45
|
+
"medium": "medium", # 4-20 minutes
|
|
46
|
+
"long": "long", # > 20 minutes
|
|
47
|
+
"all": "",
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
RESOLUTION_FILTERS = {
|
|
51
|
+
"sd": "sd",
|
|
52
|
+
"hd": "hd",
|
|
53
|
+
"4k": "4k",
|
|
54
|
+
"all": "",
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
SOURCE_FILTERS = {
|
|
58
|
+
"youtube": "youtube",
|
|
59
|
+
"dailymotion": "dailymotion",
|
|
60
|
+
"vimeo": "vimeo",
|
|
61
|
+
"metacafe": "metacafe",
|
|
62
|
+
"all": "",
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
def build_payload(
|
|
66
|
+
self,
|
|
67
|
+
query: str,
|
|
68
|
+
region: str,
|
|
69
|
+
safesearch: str,
|
|
70
|
+
timelimit: str | None,
|
|
71
|
+
page: int = 1,
|
|
72
|
+
**kwargs: Any,
|
|
73
|
+
) -> dict[str, Any]:
|
|
74
|
+
"""Build video search payload.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
query: Search query
|
|
78
|
+
region: Region code
|
|
79
|
+
safesearch: Safe search level
|
|
80
|
+
timelimit: Time filter (d, w, m)
|
|
81
|
+
page: Page number
|
|
82
|
+
**kwargs: Additional filters:
|
|
83
|
+
- length: Video length filter
|
|
84
|
+
- resolution: Video resolution filter
|
|
85
|
+
- source: Video source filter
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Query parameters dictionary
|
|
89
|
+
"""
|
|
90
|
+
payload = {
|
|
91
|
+
"p": query,
|
|
92
|
+
"fr": "sfp",
|
|
93
|
+
"fr2": "p:s,v:v,m:sb,rgn:top",
|
|
94
|
+
"ei": "UTF-8",
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
# Pagination
|
|
98
|
+
if page > 1:
|
|
99
|
+
# Each page shows ~15-20 videos
|
|
100
|
+
payload["b"] = f"{(page - 1) * 15 + 1}"
|
|
101
|
+
|
|
102
|
+
# Safe search
|
|
103
|
+
if safesearch == "on":
|
|
104
|
+
payload["safe"] = "active"
|
|
105
|
+
elif safesearch == "off":
|
|
106
|
+
payload["safe"] = "off"
|
|
107
|
+
|
|
108
|
+
# Time filter
|
|
109
|
+
if timelimit:
|
|
110
|
+
time_map = {
|
|
111
|
+
"d": "1d",
|
|
112
|
+
"w": "1w",
|
|
113
|
+
"m": "1m",
|
|
114
|
+
"y": "1y",
|
|
115
|
+
}
|
|
116
|
+
if timelimit in time_map:
|
|
117
|
+
payload["age"] = time_map[timelimit]
|
|
118
|
+
|
|
119
|
+
# Length filter
|
|
120
|
+
if "length" in kwargs and kwargs["length"] in self.LENGTH_FILTERS:
|
|
121
|
+
length_val = self.LENGTH_FILTERS[kwargs["length"]]
|
|
122
|
+
if length_val:
|
|
123
|
+
payload["vidlen"] = length_val
|
|
124
|
+
|
|
125
|
+
# Resolution filter
|
|
126
|
+
if "resolution" in kwargs and kwargs["resolution"] in self.RESOLUTION_FILTERS:
|
|
127
|
+
res_val = self.RESOLUTION_FILTERS[kwargs["resolution"]]
|
|
128
|
+
if res_val:
|
|
129
|
+
payload["vidqual"] = res_val
|
|
130
|
+
|
|
131
|
+
# Source filter
|
|
132
|
+
if "source" in kwargs and kwargs["source"] in self.SOURCE_FILTERS:
|
|
133
|
+
source_val = self.SOURCE_FILTERS[kwargs["source"]]
|
|
134
|
+
if source_val:
|
|
135
|
+
payload["site"] = source_val
|
|
136
|
+
|
|
137
|
+
return payload
|
|
138
|
+
|
|
139
|
+
def extract_video_url(self, href: str) -> str:
|
|
140
|
+
"""Extract actual video URL from Yahoo redirect.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
href: Yahoo redirect URL
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
Actual video URL
|
|
147
|
+
"""
|
|
148
|
+
if not href:
|
|
149
|
+
return href
|
|
150
|
+
|
|
151
|
+
try:
|
|
152
|
+
# Parse the URL
|
|
153
|
+
parsed = urlparse(href)
|
|
154
|
+
|
|
155
|
+
# Check if it's a Yahoo redirect
|
|
156
|
+
if "r.search.yahoo.com" in parsed.netloc or "/RU=" in href:
|
|
157
|
+
# Extract the RU parameter
|
|
158
|
+
if "/RU=" in href:
|
|
159
|
+
start = href.find("/RU=") + 4
|
|
160
|
+
end = href.find("/RK=", start)
|
|
161
|
+
if end == -1:
|
|
162
|
+
end = len(href)
|
|
163
|
+
from urllib.parse import unquote
|
|
164
|
+
return unquote(href[start:end])
|
|
165
|
+
else:
|
|
166
|
+
query_params = parse_qs(parsed.query)
|
|
167
|
+
if "url" in query_params:
|
|
168
|
+
return query_params["url"][0]
|
|
169
|
+
|
|
170
|
+
return href
|
|
171
|
+
except Exception:
|
|
172
|
+
return href
|
|
173
|
+
|
|
174
|
+
def post_extract_results(self, results: list[VideosResult]) -> list[VideosResult]:
|
|
175
|
+
"""Post-process video results.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
results: Raw extracted results
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
Cleaned results
|
|
182
|
+
"""
|
|
183
|
+
cleaned_results = []
|
|
184
|
+
|
|
185
|
+
for result in results:
|
|
186
|
+
# Extract real URL
|
|
187
|
+
if result.url:
|
|
188
|
+
result.url = self.extract_video_url(result.url)
|
|
189
|
+
|
|
190
|
+
# Skip invalid results
|
|
191
|
+
if not result.url or not result.title:
|
|
192
|
+
continue
|
|
193
|
+
|
|
194
|
+
# Clean thumbnail URL
|
|
195
|
+
if result.thumbnail and result.thumbnail.startswith("data:"):
|
|
196
|
+
result.thumbnail = ""
|
|
197
|
+
|
|
198
|
+
cleaned_results.append(result)
|
|
199
|
+
|
|
200
|
+
return cleaned_results
|
|
201
|
+
|
|
202
|
+
def search(
|
|
203
|
+
self,
|
|
204
|
+
query: str,
|
|
205
|
+
region: str = "us-en",
|
|
206
|
+
safesearch: str = "moderate",
|
|
207
|
+
timelimit: str | None = None,
|
|
208
|
+
page: int = 1,
|
|
209
|
+
max_results: int | None = None,
|
|
210
|
+
**kwargs: Any,
|
|
211
|
+
) -> list[VideosResult] | None:
|
|
212
|
+
"""Search Yahoo Videos with pagination.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
query: Video search query
|
|
216
|
+
region: Region code
|
|
217
|
+
safesearch: Safe search level
|
|
218
|
+
timelimit: Time filter
|
|
219
|
+
page: Starting page
|
|
220
|
+
max_results: Maximum results
|
|
221
|
+
**kwargs: Additional filters (length, resolution, source)
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
List of VideoResult objects
|
|
225
|
+
"""
|
|
226
|
+
results = []
|
|
227
|
+
current_page = page
|
|
228
|
+
max_pages = kwargs.get("max_pages", 5)
|
|
229
|
+
|
|
230
|
+
while current_page <= max_pages:
|
|
231
|
+
payload = self.build_payload(
|
|
232
|
+
query=query,
|
|
233
|
+
region=region,
|
|
234
|
+
safesearch=safesearch,
|
|
235
|
+
timelimit=timelimit,
|
|
236
|
+
page=current_page,
|
|
237
|
+
**kwargs
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
html_text = self.request(self.search_method, self.search_url, params=payload)
|
|
241
|
+
if not html_text:
|
|
242
|
+
break
|
|
243
|
+
|
|
244
|
+
html_text = self.pre_process_html(html_text)
|
|
245
|
+
page_results = self.extract_results(html_text)
|
|
246
|
+
|
|
247
|
+
if not page_results:
|
|
248
|
+
break
|
|
249
|
+
|
|
250
|
+
results.extend(page_results)
|
|
251
|
+
|
|
252
|
+
if max_results and len(results) >= max_results:
|
|
253
|
+
break
|
|
254
|
+
|
|
255
|
+
current_page += 1
|
|
256
|
+
|
|
257
|
+
results = self.post_extract_results(results)
|
|
258
|
+
|
|
259
|
+
if max_results:
|
|
260
|
+
results = results[:max_results]
|
|
261
|
+
|
|
262
|
+
return results if results else None
|
|
263
|
+
|
|
264
|
+
def run(
|
|
265
|
+
self,
|
|
266
|
+
keywords: str,
|
|
267
|
+
region: str = "us-en",
|
|
268
|
+
safesearch: str = "moderate",
|
|
269
|
+
timelimit: str | None = None,
|
|
270
|
+
resolution: str | None = None,
|
|
271
|
+
duration: str | None = None,
|
|
272
|
+
license_videos: str | None = None,
|
|
273
|
+
max_results: int | None = None,
|
|
274
|
+
) -> list[dict[str, str]]:
|
|
275
|
+
"""Run video search and return results as dictionaries.
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
keywords: Search query.
|
|
279
|
+
region: Region code.
|
|
280
|
+
safesearch: Safe search level.
|
|
281
|
+
timelimit: Time filter.
|
|
282
|
+
resolution: Video resolution filter.
|
|
283
|
+
duration: Video duration filter.
|
|
284
|
+
license_videos: License filter.
|
|
285
|
+
max_results: Maximum number of results.
|
|
286
|
+
|
|
287
|
+
Returns:
|
|
288
|
+
List of video result dictionaries.
|
|
289
|
+
"""
|
|
290
|
+
results = self.search(
|
|
291
|
+
query=keywords,
|
|
292
|
+
region=region,
|
|
293
|
+
safesearch=safesearch,
|
|
294
|
+
timelimit=timelimit,
|
|
295
|
+
resolution=resolution,
|
|
296
|
+
duration=duration,
|
|
297
|
+
license_videos=license_videos,
|
|
298
|
+
max_results=max_results,
|
|
299
|
+
)
|
|
300
|
+
if results is None:
|
|
301
|
+
return []
|
|
302
|
+
return [result.to_dict() for result in results]
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""Yahoo weather search using embedded JSON data."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from typing import Any, Dict, Optional
|
|
7
|
+
|
|
8
|
+
from ...http_client import HttpClient
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class YahooWeather:
|
|
12
|
+
"""Yahoo weather search using embedded JSON extraction."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, proxy: str | None = None, timeout: int | None = None, verify: bool = True):
|
|
15
|
+
"""Initialize weather search engine.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
proxy: Proxy URL.
|
|
19
|
+
timeout: Request timeout in seconds.
|
|
20
|
+
verify: Whether to verify SSL certificates.
|
|
21
|
+
"""
|
|
22
|
+
self.http_client = HttpClient(proxy=proxy, timeout=timeout, verify=verify)
|
|
23
|
+
|
|
24
|
+
def request(self, method: str, url: str, **kwargs: Any) -> str | None:
|
|
25
|
+
"""Make a request to the weather service."""
|
|
26
|
+
try:
|
|
27
|
+
from typing import cast
|
|
28
|
+
response = self.http_client.request(cast(Any, method), url, **kwargs)
|
|
29
|
+
return response.text
|
|
30
|
+
except Exception:
|
|
31
|
+
return None
|
|
32
|
+
|
|
33
|
+
def run(self, *args, **kwargs) -> list[dict[str, Any]]:
|
|
34
|
+
"""Get weather data from Yahoo.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
location: Location to get weather for (e.g., "New York", "London", "Bengaluru")
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
List of weather data dictionaries
|
|
41
|
+
"""
|
|
42
|
+
location = args[0] if args else kwargs.get("location") or kwargs.get("keywords")
|
|
43
|
+
|
|
44
|
+
if not location:
|
|
45
|
+
raise ValueError("Location is required for weather search")
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
# Use the search endpoint which redirects to the correct weather page
|
|
49
|
+
search_url = f"https://weather.yahoo.com/search/?q={location.replace(' ', '+')}"
|
|
50
|
+
|
|
51
|
+
# Fetch the page
|
|
52
|
+
response = self.request("GET", search_url)
|
|
53
|
+
if not response:
|
|
54
|
+
return [{
|
|
55
|
+
"location": location,
|
|
56
|
+
"error": "Failed to fetch weather data from Yahoo"
|
|
57
|
+
}]
|
|
58
|
+
|
|
59
|
+
# Extract JSON data from the page
|
|
60
|
+
weather_data = self._extract_json_data(response, location)
|
|
61
|
+
|
|
62
|
+
if weather_data:
|
|
63
|
+
return [weather_data]
|
|
64
|
+
|
|
65
|
+
# Fallback: try regex parsing
|
|
66
|
+
return self._parse_weather_html(response, location)
|
|
67
|
+
|
|
68
|
+
except Exception as e:
|
|
69
|
+
return [{
|
|
70
|
+
"location": location,
|
|
71
|
+
"error": f"Failed to fetch weather data: {str(e)}"
|
|
72
|
+
}]
|
|
73
|
+
|
|
74
|
+
def _extract_json_data(self, html: str, location: str) -> dict[str, Any] | None:
|
|
75
|
+
"""Extract weather data from embedded JSON in the page.
|
|
76
|
+
|
|
77
|
+
Yahoo Weather embeds JSON data in script tags that can be parsed.
|
|
78
|
+
"""
|
|
79
|
+
try:
|
|
80
|
+
# Look for the main data script tag
|
|
81
|
+
# Pattern: self.__next_f.push([1,"..JSON data.."])
|
|
82
|
+
json_pattern = r'self\.__next_f\.push\(\[1,"([^"]+)"\]\)'
|
|
83
|
+
matches = re.findall(json_pattern, html)
|
|
84
|
+
|
|
85
|
+
weather_info: Dict[str, Any] = {}
|
|
86
|
+
|
|
87
|
+
for match in matches:
|
|
88
|
+
# Unescape the JSON string
|
|
89
|
+
try:
|
|
90
|
+
# The data is escaped, so we need to decode it
|
|
91
|
+
decoded = match.encode().decode('unicode_escape')
|
|
92
|
+
|
|
93
|
+
# Look for temperature data
|
|
94
|
+
temp_match = re.search(r'"temperature":(\d+)', decoded)
|
|
95
|
+
if temp_match and not weather_info.get('temperature'):
|
|
96
|
+
weather_info['temperature'] = int(temp_match.group(1))
|
|
97
|
+
|
|
98
|
+
# Look for condition
|
|
99
|
+
condition_match = re.search(r'"iconLabel":"([^"]+)"', decoded)
|
|
100
|
+
if condition_match and not weather_info.get('condition'):
|
|
101
|
+
weather_info['condition'] = condition_match.group(1)
|
|
102
|
+
|
|
103
|
+
# Look for high/low
|
|
104
|
+
high_match = re.search(r'"highTemperature":(\d+)', decoded)
|
|
105
|
+
if high_match and not weather_info.get('high'):
|
|
106
|
+
weather_info['high'] = int(high_match.group(1))
|
|
107
|
+
|
|
108
|
+
low_match = re.search(r'"lowTemperature":(\d+)', decoded)
|
|
109
|
+
if low_match and not weather_info.get('low'):
|
|
110
|
+
weather_info['low'] = int(low_match.group(1))
|
|
111
|
+
|
|
112
|
+
# Look for humidity
|
|
113
|
+
humidity_match = re.search(r'"value":"(\d+)%"[^}]*"category":"Humidity"', decoded)
|
|
114
|
+
if humidity_match and not weather_info.get('humidity'):
|
|
115
|
+
weather_info['humidity'] = int(humidity_match.group(1))
|
|
116
|
+
|
|
117
|
+
# Look for precipitation probability
|
|
118
|
+
precip_match = re.search(r'"probabilityOfPrecipitation":"(\d+)%"', decoded)
|
|
119
|
+
if precip_match and not weather_info.get('precipitation_chance'):
|
|
120
|
+
weather_info['precipitation_chance'] = int(precip_match.group(1))
|
|
121
|
+
|
|
122
|
+
# Look for location name
|
|
123
|
+
location_match = re.search(r'"name":"([^"]+)","code":null,"woeid":(\d+)', decoded)
|
|
124
|
+
if location_match and not weather_info.get('location_name'):
|
|
125
|
+
weather_info['location_name'] = location_match.group(1)
|
|
126
|
+
weather_info['woeid'] = int(location_match.group(2))
|
|
127
|
+
|
|
128
|
+
except Exception:
|
|
129
|
+
continue
|
|
130
|
+
|
|
131
|
+
if weather_info and weather_info.get('temperature'):
|
|
132
|
+
return {
|
|
133
|
+
"location": weather_info.get('location_name', location),
|
|
134
|
+
"woeid": weather_info.get('woeid'),
|
|
135
|
+
"temperature_f": weather_info.get('temperature'),
|
|
136
|
+
"condition": weather_info.get('condition'),
|
|
137
|
+
"high_f": weather_info.get('high'),
|
|
138
|
+
"low_f": weather_info.get('low'),
|
|
139
|
+
"humidity_percent": weather_info.get('humidity'),
|
|
140
|
+
"precipitation_chance": weather_info.get('precipitation_chance'),
|
|
141
|
+
"source": "Yahoo Weather",
|
|
142
|
+
"units": "Fahrenheit"
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
except Exception:
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
def _parse_weather_html(self, html_content: str, location: str) -> list[dict[str, Any]]:
|
|
151
|
+
"""Fallback: Parse weather data from HTML content using regex.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
html_content: HTML content of weather page
|
|
155
|
+
location: Location name
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
List of weather data dictionaries
|
|
159
|
+
"""
|
|
160
|
+
try:
|
|
161
|
+
weather_data: dict[str, Any] = {"location": location}
|
|
162
|
+
|
|
163
|
+
# Extract current temperature
|
|
164
|
+
temp_patterns = [
|
|
165
|
+
r'<p[^>]*class="[^"]*font-title1[^"]*"[^>]*>(\d+)°</p>',
|
|
166
|
+
r'>(\d+)°<',
|
|
167
|
+
r'"temperature":(\d+)',
|
|
168
|
+
]
|
|
169
|
+
|
|
170
|
+
for pattern in temp_patterns:
|
|
171
|
+
match = re.search(pattern, html_content)
|
|
172
|
+
if match:
|
|
173
|
+
weather_data["temperature_f"] = int(match.group(1))
|
|
174
|
+
break
|
|
175
|
+
|
|
176
|
+
# Extract condition
|
|
177
|
+
condition_patterns = [
|
|
178
|
+
r'"iconLabel":"([^"]+)"',
|
|
179
|
+
r'aria-label="([^"]*(?:Cloudy|Sunny|Rain|Clear|Thunder|Shower|Fog)[^"]*)"',
|
|
180
|
+
]
|
|
181
|
+
|
|
182
|
+
for pattern in condition_patterns:
|
|
183
|
+
match = re.search(pattern, html_content, re.IGNORECASE)
|
|
184
|
+
if match:
|
|
185
|
+
weather_data["condition"] = match.group(1)
|
|
186
|
+
break
|
|
187
|
+
|
|
188
|
+
# Extract high/low
|
|
189
|
+
high_match = re.search(r'"highTemperature":(\d+)', html_content)
|
|
190
|
+
if high_match:
|
|
191
|
+
weather_data["high_f"] = int(high_match.group(1))
|
|
192
|
+
|
|
193
|
+
low_match = re.search(r'"lowTemperature":(\d+)', html_content)
|
|
194
|
+
if low_match:
|
|
195
|
+
weather_data["low_f"] = int(low_match.group(1))
|
|
196
|
+
|
|
197
|
+
# Extract humidity
|
|
198
|
+
humidity_match = re.search(r'Humidity[^>]*>(\d+)%|"value":"(\d+)%"[^}]*"Humidity"', html_content, re.IGNORECASE)
|
|
199
|
+
if humidity_match:
|
|
200
|
+
weather_data["humidity_percent"] = int(humidity_match.group(1) or humidity_match.group(2))
|
|
201
|
+
|
|
202
|
+
weather_data["source"] = "Yahoo Weather"
|
|
203
|
+
weather_data["units"] = "Fahrenheit"
|
|
204
|
+
|
|
205
|
+
# Remove None values
|
|
206
|
+
weather_data = {k: v for k, v in weather_data.items() if v is not None}
|
|
207
|
+
|
|
208
|
+
if len(weather_data) > 3: # Has more than just location, source, and units
|
|
209
|
+
return [weather_data]
|
|
210
|
+
|
|
211
|
+
return [{
|
|
212
|
+
"location": location,
|
|
213
|
+
"error": "Could not extract weather data from page"
|
|
214
|
+
}]
|
|
215
|
+
|
|
216
|
+
except Exception as e:
|
|
217
|
+
return [{
|
|
218
|
+
"location": location,
|
|
219
|
+
"error": f"Failed to parse weather data: {str(e)}"
|
|
220
|
+
}]
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""Yandex search engine."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Mapping
|
|
6
|
+
from random import SystemRandom
|
|
7
|
+
from typing import Any, Optional
|
|
8
|
+
|
|
9
|
+
from ..base import BaseSearchEngine
|
|
10
|
+
from ..results import TextResult
|
|
11
|
+
|
|
12
|
+
random = SystemRandom()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Yandex(BaseSearchEngine[TextResult]):
|
|
16
|
+
"""Yandex search engine."""
|
|
17
|
+
|
|
18
|
+
name = "yandex"
|
|
19
|
+
category = "text"
|
|
20
|
+
provider = "yandex"
|
|
21
|
+
|
|
22
|
+
search_url = "https://yandex.com/search/"
|
|
23
|
+
search_method = "GET"
|
|
24
|
+
|
|
25
|
+
items_xpath = "//li[contains(@class, 'serp-item')]"
|
|
26
|
+
elements_xpath: Mapping[str, str] = {
|
|
27
|
+
"title": ".//h2//text()",
|
|
28
|
+
"href": ".//h2/a/@href",
|
|
29
|
+
"body": ".//div[contains(@class, 'text-container')]//text()",
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
def build_payload(
|
|
33
|
+
self, query: str, region: str, safesearch: str, timelimit: str | None, page: int = 1, **kwargs: Any
|
|
34
|
+
) -> dict[str, Any]:
|
|
35
|
+
"""Build a payload for the search request."""
|
|
36
|
+
safesearch_base = {"on": "1", "moderate": "0", "off": "0"}
|
|
37
|
+
payload = {
|
|
38
|
+
"text": query,
|
|
39
|
+
"family": safesearch_base[safesearch.lower()],
|
|
40
|
+
}
|
|
41
|
+
if page > 1:
|
|
42
|
+
payload["p"] = str(page - 1)
|
|
43
|
+
return payload
|
|
44
|
+
|
|
45
|
+
def run(self, *args, **kwargs) -> list[TextResult]:
|
|
46
|
+
"""Run text search on Yandex.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
keywords: Search query.
|
|
50
|
+
region: Region code.
|
|
51
|
+
safesearch: Safe search level.
|
|
52
|
+
max_results: Maximum number of results (ignored for now).
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
List of TextResult objects.
|
|
56
|
+
"""
|
|
57
|
+
keywords = args[0] if args else kwargs.get("keywords")
|
|
58
|
+
if keywords is None:
|
|
59
|
+
keywords = ""
|
|
60
|
+
region = args[1] if len(args) > 1 else kwargs.get("region", "us-en")
|
|
61
|
+
safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
|
|
62
|
+
max_results = args[3] if len(args) > 3 else kwargs.get("max_results")
|
|
63
|
+
|
|
64
|
+
results = self.search(query=keywords, region=region, safesearch=safesearch)
|
|
65
|
+
if results and max_results:
|
|
66
|
+
results = results[:max_results]
|
|
67
|
+
return results or []
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Yep search engines package."""
|
|
2
|
+
|
|
3
|
+
from .base import YepBase
|
|
4
|
+
from .images import YepImages
|
|
5
|
+
from .suggestions import YepSuggestions
|
|
6
|
+
from .text import YepSearch as YepTextSearch
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"YepBase",
|
|
10
|
+
"YepTextSearch",
|
|
11
|
+
"YepImages",
|
|
12
|
+
"YepSuggestions",
|
|
13
|
+
]
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from curl_cffi.requests import Session
|
|
4
|
+
|
|
5
|
+
from ....litagent import LitAgent
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class YepBase:
|
|
9
|
+
"""Base class for Yep search engines."""
|
|
10
|
+
|
|
11
|
+
def __init__(
|
|
12
|
+
self,
|
|
13
|
+
timeout: int = 20,
|
|
14
|
+
proxies: dict[str, str] | None = None,
|
|
15
|
+
verify: bool = True,
|
|
16
|
+
impersonate: str = "chrome110",
|
|
17
|
+
):
|
|
18
|
+
self.base_url = "https://api.yep.com/fs/2/search"
|
|
19
|
+
self.timeout = timeout
|
|
20
|
+
from typing import Any, Optional, cast
|
|
21
|
+
self.session = Session(
|
|
22
|
+
proxies=cast(Any, proxies),
|
|
23
|
+
verify=verify,
|
|
24
|
+
impersonate=cast(Any, impersonate),
|
|
25
|
+
timeout=timeout,
|
|
26
|
+
)
|
|
27
|
+
self.session.headers.update(
|
|
28
|
+
{
|
|
29
|
+
**LitAgent().generate_fingerprint(),
|
|
30
|
+
"Origin": "https://yep.com",
|
|
31
|
+
"Referer": "https://yep.com/",
|
|
32
|
+
}
|
|
33
|
+
)
|
|
34
|
+
|