PyPI - webscout - Versions diffs - 8.3.7__py3-none-any.whl → 2025.10.13__py3-none-any.whl - Mend

webscout 8.3.7py3-none-any.whl → 2025.10.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of webscout might be problematic. Click here for more details.

Files changed (306) hide show

webscout/AIauto.py +250 -250
webscout/AIbase.py +379 -379
webscout/AIutel.py +60 -60
webscout/Bard.py +1012 -1012
webscout/Bing_search.py +417 -417
webscout/DWEBS.py +529 -529
webscout/Extra/Act.md +309 -309
webscout/Extra/GitToolkit/__init__.py +10 -10
webscout/Extra/GitToolkit/gitapi/README.md +110 -110
webscout/Extra/GitToolkit/gitapi/__init__.py +11 -11
webscout/Extra/GitToolkit/gitapi/repository.py +195 -195
webscout/Extra/GitToolkit/gitapi/user.py +96 -96
webscout/Extra/GitToolkit/gitapi/utils.py +61 -61
webscout/Extra/YTToolkit/README.md +375 -375
webscout/Extra/YTToolkit/YTdownloader.py +956 -956
webscout/Extra/YTToolkit/__init__.py +2 -2
webscout/Extra/YTToolkit/transcriber.py +475 -475
webscout/Extra/YTToolkit/ytapi/README.md +44 -44
webscout/Extra/YTToolkit/ytapi/__init__.py +6 -6
webscout/Extra/YTToolkit/ytapi/channel.py +307 -307
webscout/Extra/YTToolkit/ytapi/errors.py +13 -13
webscout/Extra/YTToolkit/ytapi/extras.py +118 -118
webscout/Extra/YTToolkit/ytapi/https.py +88 -88
webscout/Extra/YTToolkit/ytapi/patterns.py +61 -61
webscout/Extra/YTToolkit/ytapi/playlist.py +58 -58
webscout/Extra/YTToolkit/ytapi/pool.py +7 -7
webscout/Extra/YTToolkit/ytapi/query.py +39 -39
webscout/Extra/YTToolkit/ytapi/stream.py +62 -62
webscout/Extra/YTToolkit/ytapi/utils.py +62 -62
webscout/Extra/YTToolkit/ytapi/video.py +232 -232
webscout/Extra/autocoder/__init__.py +9 -9
webscout/Extra/autocoder/autocoder.py +1105 -1105
webscout/Extra/autocoder/autocoder_utiles.py +332 -332
webscout/Extra/gguf.md +429 -429
webscout/Extra/gguf.py +1213 -1213
webscout/Extra/tempmail/README.md +487 -487
webscout/Extra/tempmail/__init__.py +27 -27
webscout/Extra/tempmail/async_utils.py +140 -140
webscout/Extra/tempmail/base.py +160 -160
webscout/Extra/tempmail/cli.py +186 -186
webscout/Extra/tempmail/emailnator.py +84 -84
webscout/Extra/tempmail/mail_tm.py +360 -360
webscout/Extra/tempmail/temp_mail_io.py +291 -291
webscout/Extra/weather.md +281 -281
webscout/Extra/weather.py +193 -193
webscout/Litlogger/README.md +10 -10
webscout/Litlogger/__init__.py +15 -15
webscout/Litlogger/formats.py +13 -13
webscout/Litlogger/handlers.py +121 -121
webscout/Litlogger/levels.py +13 -13
webscout/Litlogger/logger.py +134 -134
webscout/Provider/AISEARCH/Perplexity.py +332 -332
webscout/Provider/AISEARCH/README.md +279 -279
webscout/Provider/AISEARCH/__init__.py +16 -1
webscout/Provider/AISEARCH/felo_search.py +206 -206
webscout/Provider/AISEARCH/genspark_search.py +323 -323
webscout/Provider/AISEARCH/hika_search.py +185 -185
webscout/Provider/AISEARCH/iask_search.py +410 -410
webscout/Provider/AISEARCH/monica_search.py +219 -219
webscout/Provider/AISEARCH/scira_search.py +316 -316
webscout/Provider/AISEARCH/stellar_search.py +177 -177
webscout/Provider/AISEARCH/webpilotai_search.py +255 -255
webscout/Provider/Aitopia.py +314 -314
webscout/Provider/Andi.py +1 -1
webscout/Provider/Apriel.py +306 -0
webscout/Provider/ChatGPTClone.py +237 -236
webscout/Provider/ChatSandbox.py +343 -343
webscout/Provider/Cloudflare.py +324 -324
webscout/Provider/Cohere.py +208 -208
webscout/Provider/Deepinfra.py +370 -366
webscout/Provider/ExaAI.py +260 -260
webscout/Provider/ExaChat.py +308 -308
webscout/Provider/Flowith.py +221 -221
webscout/Provider/GMI.py +293 -0
webscout/Provider/Gemini.py +164 -164
webscout/Provider/GeminiProxy.py +167 -167
webscout/Provider/GithubChat.py +371 -372
webscout/Provider/Groq.py +800 -800
webscout/Provider/HeckAI.py +383 -383
webscout/Provider/Jadve.py +282 -282
webscout/Provider/K2Think.py +307 -307
webscout/Provider/Koboldai.py +205 -205
webscout/Provider/LambdaChat.py +423 -423
webscout/Provider/Nemotron.py +244 -244
webscout/Provider/Netwrck.py +248 -248
webscout/Provider/OLLAMA.py +395 -395
webscout/Provider/OPENAI/Cloudflare.py +393 -393
webscout/Provider/OPENAI/FalconH1.py +451 -451
webscout/Provider/OPENAI/FreeGemini.py +296 -296
webscout/Provider/OPENAI/K2Think.py +431 -431
webscout/Provider/OPENAI/NEMOTRON.py +240 -240
webscout/Provider/OPENAI/PI.py +427 -427
webscout/Provider/OPENAI/README.md +959 -959
webscout/Provider/OPENAI/TogetherAI.py +345 -345
webscout/Provider/OPENAI/TwoAI.py +465 -465
webscout/Provider/OPENAI/__init__.py +33 -18
webscout/Provider/OPENAI/base.py +248 -248
webscout/Provider/OPENAI/chatglm.py +528 -0
webscout/Provider/OPENAI/chatgpt.py +592 -592
webscout/Provider/OPENAI/chatgptclone.py +521 -521
webscout/Provider/OPENAI/chatsandbox.py +202 -202
webscout/Provider/OPENAI/deepinfra.py +318 -314
webscout/Provider/OPENAI/e2b.py +1665 -1665
webscout/Provider/OPENAI/exaai.py +420 -420
webscout/Provider/OPENAI/exachat.py +452 -452
webscout/Provider/OPENAI/friendli.py +232 -232
webscout/Provider/OPENAI/{refact.py → gmi.py} +324 -274
webscout/Provider/OPENAI/groq.py +364 -364
webscout/Provider/OPENAI/heckai.py +314 -314
webscout/Provider/OPENAI/llmchatco.py +337 -337
webscout/Provider/OPENAI/netwrck.py +355 -355
webscout/Provider/OPENAI/oivscode.py +290 -290
webscout/Provider/OPENAI/opkfc.py +518 -518
webscout/Provider/OPENAI/pydantic_imports.py +1 -1
webscout/Provider/OPENAI/scirachat.py +535 -535
webscout/Provider/OPENAI/sonus.py +308 -308
webscout/Provider/OPENAI/standardinput.py +442 -442
webscout/Provider/OPENAI/textpollinations.py +340 -340
webscout/Provider/OPENAI/toolbaz.py +419 -416
webscout/Provider/OPENAI/typefully.py +362 -362
webscout/Provider/OPENAI/utils.py +295 -295
webscout/Provider/OPENAI/venice.py +436 -436
webscout/Provider/OPENAI/wisecat.py +387 -387
webscout/Provider/OPENAI/writecream.py +166 -166
webscout/Provider/OPENAI/x0gpt.py +378 -378
webscout/Provider/OPENAI/yep.py +389 -389
webscout/Provider/OpenGPT.py +230 -230
webscout/Provider/Openai.py +243 -243
webscout/Provider/PI.py +405 -405
webscout/Provider/Perplexitylabs.py +430 -430
webscout/Provider/QwenLM.py +272 -272
webscout/Provider/STT/__init__.py +16 -1
webscout/Provider/Sambanova.py +257 -257
webscout/Provider/StandardInput.py +309 -309
webscout/Provider/TTI/README.md +82 -82
webscout/Provider/TTI/__init__.py +33 -18
webscout/Provider/TTI/aiarta.py +413 -413
webscout/Provider/TTI/base.py +136 -136
webscout/Provider/TTI/bing.py +243 -243
webscout/Provider/TTI/gpt1image.py +149 -149
webscout/Provider/TTI/imagen.py +196 -196
webscout/Provider/TTI/infip.py +211 -211
webscout/Provider/TTI/magicstudio.py +232 -232
webscout/Provider/TTI/monochat.py +219 -219
webscout/Provider/TTI/piclumen.py +214 -214
webscout/Provider/TTI/pixelmuse.py +232 -232
webscout/Provider/TTI/pollinations.py +232 -232
webscout/Provider/TTI/together.py +288 -288
webscout/Provider/TTI/utils.py +12 -12
webscout/Provider/TTI/venice.py +367 -367
webscout/Provider/TTS/README.md +192 -192
webscout/Provider/TTS/__init__.py +33 -18
webscout/Provider/TTS/parler.py +110 -110
webscout/Provider/TTS/streamElements.py +333 -333
webscout/Provider/TTS/utils.py +280 -280
webscout/Provider/TeachAnything.py +237 -237
webscout/Provider/TextPollinationsAI.py +310 -310
webscout/Provider/TogetherAI.py +356 -356
webscout/Provider/TwoAI.py +312 -312
webscout/Provider/TypliAI.py +311 -311
webscout/Provider/UNFINISHED/ChatHub.py +208 -208
webscout/Provider/UNFINISHED/ChutesAI.py +313 -313
webscout/Provider/UNFINISHED/GizAI.py +294 -294
webscout/Provider/UNFINISHED/Marcus.py +198 -198
webscout/Provider/UNFINISHED/Qodo.py +477 -477
webscout/Provider/UNFINISHED/VercelAIGateway.py +338 -338
webscout/Provider/UNFINISHED/XenAI.py +324 -324
webscout/Provider/UNFINISHED/Youchat.py +330 -330
webscout/Provider/UNFINISHED/liner.py +334 -0
webscout/Provider/UNFINISHED/liner_api_request.py +262 -262
webscout/Provider/UNFINISHED/puterjs.py +634 -634
webscout/Provider/UNFINISHED/samurai.py +223 -223
webscout/Provider/UNFINISHED/test_lmarena.py +119 -119
webscout/Provider/Venice.py +250 -250
webscout/Provider/VercelAI.py +256 -256
webscout/Provider/WiseCat.py +231 -231
webscout/Provider/WrDoChat.py +366 -366
webscout/Provider/__init__.py +33 -18
webscout/Provider/ai4chat.py +174 -174
webscout/Provider/akashgpt.py +331 -331
webscout/Provider/cerebras.py +446 -446
webscout/Provider/chatglm.py +394 -301
webscout/Provider/cleeai.py +211 -211
webscout/Provider/elmo.py +282 -282
webscout/Provider/geminiapi.py +208 -208
webscout/Provider/granite.py +261 -261
webscout/Provider/hermes.py +263 -263
webscout/Provider/julius.py +223 -223
webscout/Provider/learnfastai.py +309 -309
webscout/Provider/llama3mitril.py +214 -214
webscout/Provider/llmchat.py +243 -243
webscout/Provider/llmchatco.py +290 -290
webscout/Provider/meta.py +801 -801
webscout/Provider/oivscode.py +309 -309
webscout/Provider/scira_chat.py +383 -383
webscout/Provider/searchchat.py +292 -292
webscout/Provider/sonus.py +258 -258
webscout/Provider/toolbaz.py +370 -367
webscout/Provider/turboseek.py +273 -273
webscout/Provider/typefully.py +207 -207
webscout/Provider/yep.py +372 -372
webscout/__init__.py +27 -31
webscout/__main__.py +5 -5
webscout/auth/api_key_manager.py +189 -189
webscout/auth/config.py +175 -175
webscout/auth/models.py +185 -185
webscout/auth/routes.py +663 -664
webscout/auth/simple_logger.py +236 -236
webscout/cli.py +523 -523
webscout/conversation.py +438 -438
webscout/exceptions.py +361 -361
webscout/litagent/Readme.md +298 -298
webscout/litagent/__init__.py +28 -28
webscout/litagent/agent.py +581 -581
webscout/litagent/constants.py +59 -59
webscout/litprinter/__init__.py +58 -58
webscout/models.py +181 -181
webscout/optimizers.py +419 -419
webscout/prompt_manager.py +288 -288
webscout/sanitize.py +1078 -1078
webscout/scout/README.md +401 -401
webscout/scout/__init__.py +8 -8
webscout/scout/core/__init__.py +6 -6
webscout/scout/core/crawler.py +297 -297
webscout/scout/core/scout.py +706 -706
webscout/scout/core/search_result.py +95 -95
webscout/scout/core/text_analyzer.py +62 -62
webscout/scout/core/text_utils.py +277 -277
webscout/scout/core/web_analyzer.py +51 -51
webscout/scout/element.py +599 -599
webscout/scout/parsers/__init__.py +69 -69
webscout/scout/parsers/html5lib_parser.py +172 -172
webscout/scout/parsers/html_parser.py +236 -236
webscout/scout/parsers/lxml_parser.py +178 -178
webscout/scout/utils.py +37 -37
webscout/search/__init__.py +51 -0
webscout/search/base.py +195 -0
webscout/search/duckduckgo_main.py +54 -0
webscout/search/engines/__init__.py +48 -0
webscout/search/engines/bing.py +84 -0
webscout/search/engines/bing_news.py +52 -0
webscout/search/engines/brave.py +43 -0
webscout/search/engines/duckduckgo/__init__.py +25 -0
webscout/search/engines/duckduckgo/answers.py +78 -0
webscout/search/engines/duckduckgo/base.py +187 -0
webscout/search/engines/duckduckgo/images.py +97 -0
webscout/search/engines/duckduckgo/maps.py +168 -0
webscout/search/engines/duckduckgo/news.py +68 -0
webscout/search/engines/duckduckgo/suggestions.py +21 -0
webscout/search/engines/duckduckgo/text.py +211 -0
webscout/search/engines/duckduckgo/translate.py +47 -0
webscout/search/engines/duckduckgo/videos.py +63 -0
webscout/search/engines/duckduckgo/weather.py +74 -0
webscout/search/engines/mojeek.py +37 -0
webscout/search/engines/wikipedia.py +56 -0
webscout/search/engines/yahoo.py +65 -0
webscout/search/engines/yahoo_news.py +64 -0
webscout/search/engines/yandex.py +43 -0
webscout/search/engines/yep/__init__.py +13 -0
webscout/search/engines/yep/base.py +32 -0
webscout/search/engines/yep/images.py +99 -0
webscout/search/engines/yep/suggestions.py +35 -0
webscout/search/engines/yep/text.py +114 -0
webscout/search/http_client.py +156 -0
webscout/search/results.py +137 -0
webscout/search/yep_main.py +44 -0
webscout/swiftcli/Readme.md +323 -323
webscout/swiftcli/__init__.py +95 -95
webscout/swiftcli/core/__init__.py +7 -7
webscout/swiftcli/core/cli.py +308 -308
webscout/swiftcli/core/context.py +104 -104
webscout/swiftcli/core/group.py +241 -241
webscout/swiftcli/decorators/__init__.py +28 -28
webscout/swiftcli/decorators/command.py +221 -221
webscout/swiftcli/decorators/options.py +220 -220
webscout/swiftcli/decorators/output.py +302 -302
webscout/swiftcli/exceptions.py +21 -21
webscout/swiftcli/plugins/__init__.py +9 -9
webscout/swiftcli/plugins/base.py +135 -135
webscout/swiftcli/plugins/manager.py +269 -269
webscout/swiftcli/utils/__init__.py +59 -59
webscout/swiftcli/utils/formatting.py +252 -252
webscout/swiftcli/utils/parsing.py +267 -267
webscout/update_checker.py +117 -117
webscout/version.py +1 -1
webscout/version.py.bak +2 -0
webscout/zeroart/README.md +89 -89
webscout/zeroart/__init__.py +134 -134
webscout/zeroart/base.py +66 -66
webscout/zeroart/effects.py +100 -100
webscout/zeroart/fonts.py +1238 -1238
{webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/METADATA +936 -937
webscout-2025.10.13.dist-info/RECORD +329 -0
webscout/Provider/AISEARCH/DeepFind.py +0 -254
webscout/Provider/OPENAI/Qwen3.py +0 -303
webscout/Provider/OPENAI/qodo.py +0 -630
webscout/Provider/OPENAI/xenai.py +0 -514
webscout/tempid.py +0 -134
webscout/webscout_search.py +0 -1183
webscout/webscout_search_async.py +0 -649
webscout/yep_search.py +0 -346
webscout-8.3.7.dist-info/RECORD +0 -301
{webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/WHEEL +0 -0
{webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/entry_points.txt +0 -0
{webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/licenses/LICENSE.md +0 -0
{webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/top_level.txt +0 -0

webscout/DWEBS.py CHANGED Viewed

@@ -1,529 +1,529 @@
-"""
-DWEBS - A Google search library with advanced features
-"""
-import random
-from time import sleep
-from webscout.scout import Scout
-# Import trio before curl_cffi to prevent eventlet socket monkey-patching conflicts
-# See: https://github.com/python-trio/trio/issues/3015
-try:
-    import trio  # noqa: F401
-except ImportError:
-    pass  # trio is optional, ignore if not available
-from concurrent.futures import ThreadPoolExecutor
-from typing import Any, Dict, List, Optional
-from urllib.parse import unquote, urlencode
-from curl_cffi.requests import Session
-class SearchResult:
-    """Class to represent a search result with metadata."""
-    def __init__(self, url: str, title: str, description: str):
-        """
-        Initialize a search result.
-        Args:
-            url: The URL of the search result
-            title: The title of the search result
-            description: The description/snippet of the search result
-        """
-        self.url = url
-        self.title = title
-        self.description = description
-        # Additional metadata that can be populated
-        self.metadata: Dict[str, Any] = {}
-    def __repr__(self) -> str:
-        """Return string representation of search result."""
-        return f"SearchResult(url={self.url}, title={self.title}, description={self.description})"
-class GoogleSearch:
-    """Google search implementation with configurable parameters and advanced features."""
-    _executor: ThreadPoolExecutor = ThreadPoolExecutor()
-    def __init__(
-        self,
-        timeout: int = 10,
-        proxies: Optional[Dict[str, str]] = None,
-        verify: bool = True,
-        lang: str = "en",
-        sleep_interval: float = 0.0,
-        impersonate: str = "chrome110"
-    ):
-        """
-        Initialize GoogleSearch with custom settings.
-        Args:
-            timeout: Request timeout in seconds
-            proxies: Proxy configuration for requests
-            verify: Whether to verify SSL certificates
-            lang: Search language
-            sleep_interval: Sleep time between pagination requests
-            impersonate: Browser profile for curl_cffi. Defaults to "chrome110".
-        """
-        self.timeout = timeout # Keep timeout for potential non-session uses or reference
-        self.proxies = proxies if proxies else {}
-        self.verify = verify
-        self.lang = lang
-        self.sleep_interval = sleep_interval
-        self.base_url = "https://www.google.com/search"
-        # Initialize curl_cffi session
-        self.session = Session(
-            proxies=self.proxies,
-            verify=self.verify,
-            timeout=self.timeout,
-            impersonate=impersonate
-        )
-        # Set common headers for the session
-        self.session.headers = {
-            "User-Agent": self._get_useragent(),
-            "Accept-Language": self.lang,
-            "Accept-Encoding": "gzip, deflate, br",
-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
-        }
-        # Set default cookies for the session
-        self.session.cookies.update({
-            'CONSENT': 'PENDING+987',
-            'SOCS': 'CAESHAgBEhIaAB',
-        })
-    def _get_useragent(self) -> str:
-        """
-        Generate a random user agent string.
-        Returns:
-            Random user agent string
-        """
-        lynx_version = f"Lynx/{random.randint(2, 3)}.{random.randint(8, 9)}.{random.randint(0, 2)}"
-        libwww_version = f"libwww-FM/{random.randint(2, 3)}.{random.randint(13, 15)}"
-        ssl_mm_version = f"SSL-MM/{random.randint(1, 2)}.{random.randint(3, 5)}"
-        openssl_version = f"OpenSSL/{random.randint(1, 3)}.{random.randint(0, 4)}.{random.randint(0, 9)}"
-        return f"{lynx_version} {libwww_version} {ssl_mm_version} {openssl_version}"
-    def _make_request(self, term: str, results: int, start: int = 0, search_type: str = None) -> str:
-        """
-        Make a request to Google search.
-        Args:
-            term: Search query
-            results: Number of results to request
-            start: Start position for pagination
-            search_type: Type of search ('', 'nws', 'isch')
-        Returns:
-            HTML response content
-        """
-        params = {
-            "q": term,
-            "num": results + 2,  # Request slightly more than needed
-            "hl": self.lang,
-            "start": start,
-        }
-        # Add search type if specified
-        if search_type:
-            params["tbm"] = search_type
-        try:
-            # Use the curl_cffi session
-            resp = self.session.get(
-                url=self.base_url,
-                params=params,
-                # Headers and cookies are now part of the session
-                # proxies, timeout, verify are handled by the session
-            )
-            resp.raise_for_status()
-            return resp.text
-        except Exception as e:
-            # Provide more specific error context if possible
-            if hasattr(e, 'response') and e.response is not None:
-                 raise RuntimeError(f"Search request failed with status {e.response.status_code}: {str(e)}")
-            else:
-                 raise RuntimeError(f"Search request failed: {str(e)}")
-    def _extract_url(self, raw_link: str) -> Optional[str]:
-        """
-        Extract actual URL from Google redirect URL.
-        Args:
-            raw_link: Raw link from Google search
-        Returns:
-            Actual URL or None if invalid
-        """
-        if not raw_link:
-            return None
-        if raw_link.startswith("/url?"):
-            try:
-                link = unquote(raw_link.split("&")[0].replace("/url?q=", ""))
-                return link
-            except Exception:
-                return None
-        elif raw_link.startswith("http"):
-            return unquote(raw_link)
-        return None
-    def _is_valid_result(self, link: str, fetched_links: set, unique: bool) -> bool:
-        """
-        Check if search result is valid.
-        Args:
-            link: URL to check
-            fetched_links: Set of already fetched links
-            unique: Whether to filter duplicate links
-        Returns:
-            Boolean indicating if result is valid
-        """
-        if any(x in link for x in ["google.", "/search?", "webcache."]):
-            return False
-        if link in fetched_links and unique:
-            return False
-        return True
-    def _parse_search_results(
-        self,
-        html: str,
-        num_results: int,
-        fetched_links: set,
-        unique: bool
-    ) -> List[SearchResult]:
-        """
-        Parse search results from HTML.
-        Args:
-            html: HTML content to parse
-            num_results: Maximum number of results to return
-            fetched_links: Set of already fetched links
-            unique: Filter duplicate links
-        Returns:
-            List of SearchResult objects
-        """
-        results = []
-        soup = Scout(html, features="html.parser")
-        result_blocks = soup.find_all("div", class_="ezO2md")
-        if not result_blocks:
-            # Try alternative class patterns if the main one doesn't match
-            result_blocks = soup.find_all("div", attrs={"class": lambda c: c and "g" in c.split()})
-        for result in result_blocks:
-            # Find the link - looking for various potential Google result classes
-            link_tag = result.find("a", class_=["fuLhoc", "ZWRArf"])
-            if not link_tag:
-                link_tag = result.find("a")
-                if not link_tag:
-                    continue
-            raw_link = link_tag.get("href", "")
-            link = self._extract_url(raw_link)
-            if not link:
-                continue
-            if not self._is_valid_result(link, fetched_links, unique):
-                continue
-            # Get title - it's the text content of the link tag for these results
-            title = link_tag.get_text(strip=True)
-            if not title:
-                continue
-            # Get description - it's in a span with class FrIlee or potentially other classes
-            description_tag = result.find("span", class_="FrIlee")
-            if not description_tag:
-                description_tag = result.find(["div", "span"], class_=lambda c: c and any(x in c for x in ["snippet", "description", "VwiC3b"]))
-            description = description_tag.get_text(strip=True) if description_tag else ""
-            # Create result object
-            search_result = SearchResult(link, title, description)
-            # Add extra metadata if available
-            citation = result.find("cite")
-            if citation:
-                search_result.metadata["source"] = citation.get_text(strip=True)
-            timestamp = result.find("span", class_=lambda c: c and "ZE5qJf" in c)
-            if timestamp:
-                search_result.metadata["date"] = timestamp.get_text(strip=True)
-            fetched_links.add(link)
-            results.append(search_result)
-            if len(results) >= num_results:
-                break
-        return results
-    def text(
-        self,
-        keywords: str,
-        region: str = None,
-        safesearch: str = "moderate",
-        max_results: int = 10,
-        start_num: int = 0,
-        unique: bool = True
-    ) -> List[SearchResult]:
-        """
-        Search Google for web results.
-        Args:
-            keywords: Search query
-            region: Region for search results (ISO country code)
-            safesearch: SafeSearch setting ("on", "moderate", "off")
-            max_results: Maximum number of results to return
-            start_num: Starting position for pagination
-            unique: Filter duplicate results
-        Returns:
-            List of SearchResult objects with search results
-        """
-        if not keywords:
-            raise ValueError("Search keywords cannot be empty")
-        # Map safesearch values to Google's safe parameter
-        safe_map = {
-            "on": "active",
-            "moderate": "moderate",
-            "off": "off"
-        }
-        safe = safe_map.get(safesearch.lower(), "moderate")
-        # Keep track of unique results
-        fetched_results = []
-        fetched_links = set()
-        start = start_num
-        while len(fetched_results) < max_results:
-            # Add safe search parameter to the request
-            # Note: This modifies the session params for this specific request type
-            # It might be better to pass params directly to session.get if mixing search types
-            term_with_safe = f"{keywords} safe:{safe}"
-            if region and region.lower() != "all":
-                 term_with_safe += f" location:{region}" # Example of adding region, adjust as needed
-            response_html = self._make_request(
-                term=term_with_safe, # Pass term with safe search
-                results=max_results - len(fetched_results),
-                start=start
-            )
-            results = self._parse_search_results(
-                html=response_html,
-                num_results=max_results - len(fetched_results),
-                fetched_links=fetched_links,
-                unique=unique
-            )
-            if not results:
-                break
-            fetched_results.extend(results)
-            if len(fetched_results) >= max_results:
-                break
-            start += 10 # Google typically uses increments of 10
-            sleep(self.sleep_interval)
-        return fetched_results[:max_results]
-    def news(
-        self,
-        keywords: str,
-        region: str = None,
-        safesearch: str = "moderate",
-        max_results: int = 10
-    ) -> List[SearchResult]:
-        """
-        Search Google News for news results.
-        Args:
-            keywords: Search query
-            region: Region for search results (ISO country code)
-            safesearch: SafeSearch setting ("on", "moderate", "off")
-            max_results: Maximum number of results to return
-        Returns:
-            List of SearchResult objects with news results
-        """
-        if not keywords:
-            raise ValueError("Search keywords cannot be empty")
-        # Map safesearch values to Google's safe parameter
-        safe_map = {
-            "on": "active",
-            "moderate": "moderate",
-            "off": "off"
-        }
-        safe = safe_map.get(safesearch.lower(), "moderate")
-        # Keep track of unique results
-        fetched_links = set()
-        # Add safe search parameter
-        term_with_safe = f"{keywords} safe:{safe}"
-        if region and region.lower() != "all":
-             term_with_safe += f" location:{region}" # Example
-        response_html = self._make_request(
-            term=term_with_safe, # Pass term with safe search
-            results=max_results,
-            search_type="nws"
-        )
-        results = self._parse_search_results(
-            html=response_html,
-            num_results=max_results,
-            fetched_links=fetched_links,
-            unique=True # News results are generally unique per request
-        )
-        return results[:max_results]
-    def suggestions(self, query: str, region: str = None) -> List[str]:
-        """
-        Get search suggestions for a query term.
-        Args:
-            query: Search query
-            region: Region for suggestions (ISO country code)
-        Returns:
-            List of search suggestions
-        """
-        if not query:
-            raise ValueError("Search query cannot be empty")
-        try:
-            params = {
-                "client": "firefox",
-                "q": query,
-            }
-            # Add region if specified
-            if region and region.lower() != "all":
-                params["gl"] = region
-            url = f"https://www.google.com/complete/search?{urlencode(params)}"
-            # Use a simpler header set for the suggestions API
-            headers = {
-                "User-Agent": self._get_useragent(),
-                "Accept": "application/json, text/javascript, */*",
-                "Accept-Language": self.lang,
-            }
-            # Use session.get but override headers for this specific request
-            response = self.session.get(
-                url=url,
-                headers=headers,
-                params=params # Pass params directly
-                # timeout and verify are handled by session
-            )
-            response.raise_for_status()
-            # Response format is typically: ["original query", ["suggestion1", "suggestion2", ...]]
-            data = response.json()
-            if isinstance(data, list) and len(data) > 1 and isinstance(data[1], list):
-                return data[1]
-            return []
-        except Exception as e:
-            # Provide more specific error context if possible
-            if hasattr(e, 'response') and e.response is not None:
-                 # Log error or handle differently if needed
-                 print(f"Suggestions request failed with status {e.response.status_code}: {str(e)}")
-            else:
-                 print(f"Suggestions request failed: {str(e)}")
-            # Return empty list on error instead of raising exception
-            return []
-# Legacy function support for backward compatibility
-def search(term, num_results=10, lang="en", proxy=None, advanced=False, sleep_interval=0, timeout=5, safe="active", ssl_verify=True, region=None, start_num=0, unique=False, impersonate="chrome110"): # Added impersonate
-    """Legacy function for backward compatibility."""
-    google_search = GoogleSearch(
-        timeout=timeout,
-        proxies={"https": proxy, "http": proxy} if proxy else None,
-        verify=ssl_verify,
-        lang=lang,
-        sleep_interval=sleep_interval,
-        impersonate=impersonate # Pass impersonate
-    )
-    # Map legacy safe values
-    safe_search_map = {
-        "active": "on",
-        "moderate": "moderate",
-        "off": "off"
-    }
-    safesearch_val = safe_search_map.get(safe, "moderate")
-    results = google_search.text(
-        keywords=term,
-        region=region,
-        safesearch=safesearch_val,
-        max_results=num_results,
-        start_num=start_num,
-        unique=unique
-    )
-    # Convert to simple URLs if not advanced mode
-    if not advanced:
-        return [result.url for result in results]
-    return results
-if __name__ == "__main__":
-    from rich import print
-    google = GoogleSearch(
-        timeout=10,  # Optional: Set custom timeout
-        proxies=None,  # Optional: Use proxies
-        verify=True    # Optional: SSL verification
-    )
-    # Text Search
-    print("TEXT SEARCH RESULTS:")
-    text_results = google.text(
-        keywords="Python programming",
-        region="us",           # Optional: Region for results
-        safesearch="moderate",  # Optional: "on", "moderate", "off"
-        max_results=3          # Optional: Limit number of results
-    )
-    for result in text_results:
-        print(f"Title: {result.title}")
-        print(f"URL: {result.url}")
-        print(f"Description: {result.description}")
-        print("---")
-    # News Search
-    print("\nNEWS SEARCH RESULTS:")
-    news_results = google.news(
-        keywords="artificial intelligence",
-        region="us",
-        safesearch="moderate",
-        max_results=2
-    )
-    for result in news_results:
-        print(f"Title: {result.title}")
-        print(f"URL: {result.url}")
-        print(f"Description: {result.description}")
-        print("---")
-    # Search Suggestions
-    print("\nSEARCH SUGGESTIONS:")
-    suggestions = google.suggestions("how to")
-    print(suggestions)
+"""
+DWEBS - A Google search library with advanced features
+"""
+import random
+from time import sleep
+from webscout.scout import Scout
+# Import trio before curl_cffi to prevent eventlet socket monkey-patching conflicts
+# See: https://github.com/python-trio/trio/issues/3015
+try:
+    import trio  # noqa: F401
+except ImportError:
+    pass  # trio is optional, ignore if not available
+from concurrent.futures import ThreadPoolExecutor
+from typing import Any, Dict, List, Optional
+from urllib.parse import unquote, urlencode
+from curl_cffi.requests import Session
+class SearchResult:
+    """Class to represent a search result with metadata."""
+    def __init__(self, url: str, title: str, description: str):
+        """
+        Initialize a search result.
+        Args:
+            url: The URL of the search result
+            title: The title of the search result
+            description: The description/snippet of the search result
+        """
+        self.url = url
+        self.title = title
+        self.description = description
+        # Additional metadata that can be populated
+        self.metadata: Dict[str, Any] = {}
+    def __repr__(self) -> str:
+        """Return string representation of search result."""
+        return f"SearchResult(url={self.url}, title={self.title}, description={self.description})"
+class GoogleSearch:
+    """Google search implementation with configurable parameters and advanced features."""
+    _executor: ThreadPoolExecutor = ThreadPoolExecutor()
+    def __init__(
+        self,
+        timeout: int = 10,
+        proxies: Optional[Dict[str, str]] = None,
+        verify: bool = True,
+        lang: str = "en",
+        sleep_interval: float = 0.0,
+        impersonate: str = "chrome110"
+    ):
+        """
+        Initialize GoogleSearch with custom settings.
+        Args:
+            timeout: Request timeout in seconds
+            proxies: Proxy configuration for requests
+            verify: Whether to verify SSL certificates
+            lang: Search language
+            sleep_interval: Sleep time between pagination requests
+            impersonate: Browser profile for curl_cffi. Defaults to "chrome110".
+        """
+        self.timeout = timeout # Keep timeout for potential non-session uses or reference
+        self.proxies = proxies if proxies else {}
+        self.verify = verify
+        self.lang = lang
+        self.sleep_interval = sleep_interval
+        self.base_url = "https://www.google.com/search"
+        # Initialize curl_cffi session
+        self.session = Session(
+            proxies=self.proxies,
+            verify=self.verify,
+            timeout=self.timeout,
+            impersonate=impersonate
+        )
+        # Set common headers for the session
+        self.session.headers = {
+            "User-Agent": self._get_useragent(),
+            "Accept-Language": self.lang,
+            "Accept-Encoding": "gzip, deflate, br",
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+        }
+        # Set default cookies for the session
+        self.session.cookies.update({
+            'CONSENT': 'PENDING+987',
+            'SOCS': 'CAESHAgBEhIaAB',
+        })
+    def _get_useragent(self) -> str:
+        """
+        Generate a random user agent string.
+        Returns:
+            Random user agent string
+        """
+        lynx_version = f"Lynx/{random.randint(2, 3)}.{random.randint(8, 9)}.{random.randint(0, 2)}"
+        libwww_version = f"libwww-FM/{random.randint(2, 3)}.{random.randint(13, 15)}"
+        ssl_mm_version = f"SSL-MM/{random.randint(1, 2)}.{random.randint(3, 5)}"
+        openssl_version = f"OpenSSL/{random.randint(1, 3)}.{random.randint(0, 4)}.{random.randint(0, 9)}"
+        return f"{lynx_version} {libwww_version} {ssl_mm_version} {openssl_version}"
+    def _make_request(self, term: str, results: int, start: int = 0, search_type: str = None) -> str:
+        """
+        Make a request to Google search.
+        Args:
+            term: Search query
+            results: Number of results to request
+            start: Start position for pagination
+            search_type: Type of search ('', 'nws', 'isch')
+        Returns:
+            HTML response content
+        """
+        params = {
+            "q": term,
+            "num": results + 2,  # Request slightly more than needed
+            "hl": self.lang,
+            "start": start,
+        }
+        # Add search type if specified
+        if search_type:
+            params["tbm"] = search_type
+        try:
+            # Use the curl_cffi session
+            resp = self.session.get(
+                url=self.base_url,
+                params=params,
+                # Headers and cookies are now part of the session
+                # proxies, timeout, verify are handled by the session
+            )
+            resp.raise_for_status()
+            return resp.text
+        except Exception as e:
+            # Provide more specific error context if possible
+            if hasattr(e, 'response') and e.response is not None:
+                 raise RuntimeError(f"Search request failed with status {e.response.status_code}: {str(e)}")
+            else:
+                 raise RuntimeError(f"Search request failed: {str(e)}")
+    def _extract_url(self, raw_link: str) -> Optional[str]:
+        """
+        Extract actual URL from Google redirect URL.
+        Args:
+            raw_link: Raw link from Google search
+        Returns:
+            Actual URL or None if invalid
+        """
+        if not raw_link:
+            return None
+        if raw_link.startswith("/url?"):
+            try:
+                link = unquote(raw_link.split("&")[0].replace("/url?q=", ""))
+                return link
+            except Exception:
+                return None
+        elif raw_link.startswith("http"):
+            return unquote(raw_link)
+        return None
+    def _is_valid_result(self, link: str, fetched_links: set, unique: bool) -> bool:
+        """
+        Check if search result is valid.
+        Args:
+            link: URL to check
+            fetched_links: Set of already fetched links
+            unique: Whether to filter duplicate links
+        Returns:
+            Boolean indicating if result is valid
+        """
+        if any(x in link for x in ["google.", "/search?", "webcache."]):
+            return False
+        if link in fetched_links and unique:
+            return False
+        return True
+    def _parse_search_results(
+        self,
+        html: str,
+        num_results: int,
+        fetched_links: set,
+        unique: bool
+    ) -> List[SearchResult]:
+        """
+        Parse search results from HTML.
+        Args:
+            html: HTML content to parse
+            num_results: Maximum number of results to return
+            fetched_links: Set of already fetched links
+            unique: Filter duplicate links
+        Returns:
+            List of SearchResult objects
+        """
+        results = []
+        soup = Scout(html, features="html.parser")
+        result_blocks = soup.find_all("div", class_="ezO2md")
+        if not result_blocks:
+            # Try alternative class patterns if the main one doesn't match
+            result_blocks = soup.find_all("div", attrs={"class": lambda c: c and "g" in c.split()})
+        for result in result_blocks:
+            # Find the link - looking for various potential Google result classes
+            link_tag = result.find("a", class_=["fuLhoc", "ZWRArf"])
+            if not link_tag:
+                link_tag = result.find("a")
+                if not link_tag:
+                    continue
+            raw_link = link_tag.get("href", "")
+            link = self._extract_url(raw_link)
+            if not link:
+                continue
+            if not self._is_valid_result(link, fetched_links, unique):
+                continue
+            # Get title - it's the text content of the link tag for these results
+            title = link_tag.get_text(strip=True)
+            if not title:
+                continue
+            # Get description - it's in a span with class FrIlee or potentially other classes
+            description_tag = result.find("span", class_="FrIlee")
+            if not description_tag:
+                description_tag = result.find(["div", "span"], class_=lambda c: c and any(x in c for x in ["snippet", "description", "VwiC3b"]))
+            description = description_tag.get_text(strip=True) if description_tag else ""
+            # Create result object
+            search_result = SearchResult(link, title, description)
+            # Add extra metadata if available
+            citation = result.find("cite")
+            if citation:
+                search_result.metadata["source"] = citation.get_text(strip=True)
+            timestamp = result.find("span", class_=lambda c: c and "ZE5qJf" in c)
+            if timestamp:
+                search_result.metadata["date"] = timestamp.get_text(strip=True)
+            fetched_links.add(link)
+            results.append(search_result)
+            if len(results) >= num_results:
+                break
+        return results
+    def text(
+        self,
+        keywords: str,
+        region: str = None,
+        safesearch: str = "moderate",
+        max_results: int = 10,
+        start_num: int = 0,
+        unique: bool = True
+    ) -> List[SearchResult]:
+        """
+        Search Google for web results.
+        Args:
+            keywords: Search query
+            region: Region for search results (ISO country code)
+            safesearch: SafeSearch setting ("on", "moderate", "off")
+            max_results: Maximum number of results to return
+            start_num: Starting position for pagination
+            unique: Filter duplicate results
+        Returns:
+            List of SearchResult objects with search results
+        """
+        if not keywords:
+            raise ValueError("Search keywords cannot be empty")
+        # Map safesearch values to Google's safe parameter
+        safe_map = {
+            "on": "active",
+            "moderate": "moderate",
+            "off": "off"
+        }
+        safe = safe_map.get(safesearch.lower(), "moderate")
+        # Keep track of unique results
+        fetched_results = []
+        fetched_links = set()
+        start = start_num
+        while len(fetched_results) < max_results:
+            # Add safe search parameter to the request
+            # Note: This modifies the session params for this specific request type
+            # It might be better to pass params directly to session.get if mixing search types
+            term_with_safe = f"{keywords} safe:{safe}"
+            if region and region.lower() != "all":
+                 term_with_safe += f" location:{region}" # Example of adding region, adjust as needed
+            response_html = self._make_request(
+                term=term_with_safe, # Pass term with safe search
+                results=max_results - len(fetched_results),
+                start=start
+            )
+            results = self._parse_search_results(
+                html=response_html,
+                num_results=max_results - len(fetched_results),
+                fetched_links=fetched_links,
+                unique=unique
+            )
+            if not results:
+                break
+            fetched_results.extend(results)
+            if len(fetched_results) >= max_results:
+                break
+            start += 10 # Google typically uses increments of 10
+            sleep(self.sleep_interval)
+        return fetched_results[:max_results]
+    def news(
+        self,
+        keywords: str,
+        region: str = None,
+        safesearch: str = "moderate",
+        max_results: int = 10
+    ) -> List[SearchResult]:
+        """
+        Search Google News for news results.
+        Args:
+            keywords: Search query
+            region: Region for search results (ISO country code)
+            safesearch: SafeSearch setting ("on", "moderate", "off")
+            max_results: Maximum number of results to return
+        Returns:
+            List of SearchResult objects with news results
+        """
+        if not keywords:
+            raise ValueError("Search keywords cannot be empty")
+        # Map safesearch values to Google's safe parameter
+        safe_map = {
+            "on": "active",
+            "moderate": "moderate",
+            "off": "off"
+        }
+        safe = safe_map.get(safesearch.lower(), "moderate")
+        # Keep track of unique results
+        fetched_links = set()
+        # Add safe search parameter
+        term_with_safe = f"{keywords} safe:{safe}"
+        if region and region.lower() != "all":
+             term_with_safe += f" location:{region}" # Example
+        response_html = self._make_request(
+            term=term_with_safe, # Pass term with safe search
+            results=max_results,
+            search_type="nws"
+        )
+        results = self._parse_search_results(
+            html=response_html,
+            num_results=max_results,
+            fetched_links=fetched_links,
+            unique=True # News results are generally unique per request
+        )
+        return results[:max_results]
+    def suggestions(self, query: str, region: str = None) -> List[str]:
+        """
+        Get search suggestions for a query term.
+        Args:
+            query: Search query
+            region: Region for suggestions (ISO country code)
+        Returns:
+            List of search suggestions
+        """
+        if not query:
+            raise ValueError("Search query cannot be empty")
+        try:
+            params = {
+                "client": "firefox",
+                "q": query,
+            }
+            # Add region if specified
+            if region and region.lower() != "all":
+                params["gl"] = region
+            url = f"https://www.google.com/complete/search?{urlencode(params)}"
+            # Use a simpler header set for the suggestions API
+            headers = {
+                "User-Agent": self._get_useragent(),
+                "Accept": "application/json, text/javascript, */*",
+                "Accept-Language": self.lang,
+            }
+            # Use session.get but override headers for this specific request
+            response = self.session.get(
+                url=url,
+                headers=headers,
+                params=params # Pass params directly
+                # timeout and verify are handled by session
+            )
+            response.raise_for_status()
+            # Response format is typically: ["original query", ["suggestion1", "suggestion2", ...]]
+            data = response.json()
+            if isinstance(data, list) and len(data) > 1 and isinstance(data[1], list):
+                return data[1]
+            return []
+        except Exception as e:
+            # Provide more specific error context if possible
+            if hasattr(e, 'response') and e.response is not None:
+                 # Log error or handle differently if needed
+                 print(f"Suggestions request failed with status {e.response.status_code}: {str(e)}")
+            else:
+                 print(f"Suggestions request failed: {str(e)}")
+            # Return empty list on error instead of raising exception
+            return []
+# Legacy function support for backward compatibility
+def search(term, num_results=10, lang="en", proxy=None, advanced=False, sleep_interval=0, timeout=5, safe="active", ssl_verify=True, region=None, start_num=0, unique=False, impersonate="chrome110"): # Added impersonate
+    """Legacy function for backward compatibility."""
+    google_search = GoogleSearch(
+        timeout=timeout,
+        proxies={"https": proxy, "http": proxy} if proxy else None,
+        verify=ssl_verify,
+        lang=lang,
+        sleep_interval=sleep_interval,
+        impersonate=impersonate # Pass impersonate
+    )
+    # Map legacy safe values
+    safe_search_map = {
+        "active": "on",
+        "moderate": "moderate",
+        "off": "off"
+    }
+    safesearch_val = safe_search_map.get(safe, "moderate")
+    results = google_search.text(
+        keywords=term,
+        region=region,
+        safesearch=safesearch_val,
+        max_results=num_results,
+        start_num=start_num,
+        unique=unique
+    )
+    # Convert to simple URLs if not advanced mode
+    if not advanced:
+        return [result.url for result in results]
+    return results
+if __name__ == "__main__":
+    from rich import print
+    google = GoogleSearch(
+        timeout=10,  # Optional: Set custom timeout
+        proxies=None,  # Optional: Use proxies
+        verify=True    # Optional: SSL verification
+    )
+    # Text Search
+    print("TEXT SEARCH RESULTS:")
+    text_results = google.text(
+        keywords="Python programming",
+        region="us",           # Optional: Region for results
+        safesearch="moderate",  # Optional: "on", "moderate", "off"
+        max_results=3          # Optional: Limit number of results
+    )
+    for result in text_results:
+        print(f"Title: {result.title}")
+        print(f"URL: {result.url}")
+        print(f"Description: {result.description}")
+        print("---")
+    # News Search
+    print("\nNEWS SEARCH RESULTS:")
+    news_results = google.news(
+        keywords="artificial intelligence",
+        region="us",
+        safesearch="moderate",
+        max_results=2
+    )
+    for result in news_results:
+        print(f"Title: {result.title}")
+        print(f"URL: {result.url}")
+        print(f"Description: {result.description}")
+        print("---")
+    # Search Suggestions
+    print("\nSEARCH SUGGESTIONS:")
+    suggestions = google.suggestions("how to")
+    print(suggestions)

webscout 8.3.7__py3-none-any.whl → 2025.10.13__py3-none-any.whl

Potentially problematic release.

webscout 8.3.7py3-none-any.whl → 2025.10.13py3-none-any.whl