webscout 8.3.7__py3-none-any.whl → 2025.10.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/AIauto.py +250 -250
- webscout/AIbase.py +379 -379
- webscout/AIutel.py +60 -60
- webscout/Bard.py +1012 -1012
- webscout/Bing_search.py +417 -417
- webscout/DWEBS.py +529 -529
- webscout/Extra/Act.md +309 -309
- webscout/Extra/GitToolkit/__init__.py +10 -10
- webscout/Extra/GitToolkit/gitapi/README.md +110 -110
- webscout/Extra/GitToolkit/gitapi/__init__.py +11 -11
- webscout/Extra/GitToolkit/gitapi/repository.py +195 -195
- webscout/Extra/GitToolkit/gitapi/user.py +96 -96
- webscout/Extra/GitToolkit/gitapi/utils.py +61 -61
- webscout/Extra/YTToolkit/README.md +375 -375
- webscout/Extra/YTToolkit/YTdownloader.py +956 -956
- webscout/Extra/YTToolkit/__init__.py +2 -2
- webscout/Extra/YTToolkit/transcriber.py +475 -475
- webscout/Extra/YTToolkit/ytapi/README.md +44 -44
- webscout/Extra/YTToolkit/ytapi/__init__.py +6 -6
- webscout/Extra/YTToolkit/ytapi/channel.py +307 -307
- webscout/Extra/YTToolkit/ytapi/errors.py +13 -13
- webscout/Extra/YTToolkit/ytapi/extras.py +118 -118
- webscout/Extra/YTToolkit/ytapi/https.py +88 -88
- webscout/Extra/YTToolkit/ytapi/patterns.py +61 -61
- webscout/Extra/YTToolkit/ytapi/playlist.py +58 -58
- webscout/Extra/YTToolkit/ytapi/pool.py +7 -7
- webscout/Extra/YTToolkit/ytapi/query.py +39 -39
- webscout/Extra/YTToolkit/ytapi/stream.py +62 -62
- webscout/Extra/YTToolkit/ytapi/utils.py +62 -62
- webscout/Extra/YTToolkit/ytapi/video.py +232 -232
- webscout/Extra/autocoder/__init__.py +9 -9
- webscout/Extra/autocoder/autocoder.py +1105 -1105
- webscout/Extra/autocoder/autocoder_utiles.py +332 -332
- webscout/Extra/gguf.md +429 -429
- webscout/Extra/gguf.py +1213 -1213
- webscout/Extra/tempmail/README.md +487 -487
- webscout/Extra/tempmail/__init__.py +27 -27
- webscout/Extra/tempmail/async_utils.py +140 -140
- webscout/Extra/tempmail/base.py +160 -160
- webscout/Extra/tempmail/cli.py +186 -186
- webscout/Extra/tempmail/emailnator.py +84 -84
- webscout/Extra/tempmail/mail_tm.py +360 -360
- webscout/Extra/tempmail/temp_mail_io.py +291 -291
- webscout/Extra/weather.md +281 -281
- webscout/Extra/weather.py +193 -193
- webscout/Litlogger/README.md +10 -10
- webscout/Litlogger/__init__.py +15 -15
- webscout/Litlogger/formats.py +13 -13
- webscout/Litlogger/handlers.py +121 -121
- webscout/Litlogger/levels.py +13 -13
- webscout/Litlogger/logger.py +134 -134
- webscout/Provider/AISEARCH/Perplexity.py +332 -332
- webscout/Provider/AISEARCH/README.md +279 -279
- webscout/Provider/AISEARCH/__init__.py +16 -1
- webscout/Provider/AISEARCH/felo_search.py +206 -206
- webscout/Provider/AISEARCH/genspark_search.py +323 -323
- webscout/Provider/AISEARCH/hika_search.py +185 -185
- webscout/Provider/AISEARCH/iask_search.py +410 -410
- webscout/Provider/AISEARCH/monica_search.py +219 -219
- webscout/Provider/AISEARCH/scira_search.py +316 -316
- webscout/Provider/AISEARCH/stellar_search.py +177 -177
- webscout/Provider/AISEARCH/webpilotai_search.py +255 -255
- webscout/Provider/Aitopia.py +314 -314
- webscout/Provider/Andi.py +1 -1
- webscout/Provider/Apriel.py +306 -0
- webscout/Provider/ChatGPTClone.py +237 -236
- webscout/Provider/ChatSandbox.py +343 -343
- webscout/Provider/Cloudflare.py +324 -324
- webscout/Provider/Cohere.py +208 -208
- webscout/Provider/Deepinfra.py +370 -366
- webscout/Provider/ExaAI.py +260 -260
- webscout/Provider/ExaChat.py +308 -308
- webscout/Provider/Flowith.py +221 -221
- webscout/Provider/GMI.py +293 -0
- webscout/Provider/Gemini.py +164 -164
- webscout/Provider/GeminiProxy.py +167 -167
- webscout/Provider/GithubChat.py +371 -372
- webscout/Provider/Groq.py +800 -800
- webscout/Provider/HeckAI.py +383 -383
- webscout/Provider/Jadve.py +282 -282
- webscout/Provider/K2Think.py +307 -307
- webscout/Provider/Koboldai.py +205 -205
- webscout/Provider/LambdaChat.py +423 -423
- webscout/Provider/Nemotron.py +244 -244
- webscout/Provider/Netwrck.py +248 -248
- webscout/Provider/OLLAMA.py +395 -395
- webscout/Provider/OPENAI/Cloudflare.py +393 -393
- webscout/Provider/OPENAI/FalconH1.py +451 -451
- webscout/Provider/OPENAI/FreeGemini.py +296 -296
- webscout/Provider/OPENAI/K2Think.py +431 -431
- webscout/Provider/OPENAI/NEMOTRON.py +240 -240
- webscout/Provider/OPENAI/PI.py +427 -427
- webscout/Provider/OPENAI/README.md +959 -959
- webscout/Provider/OPENAI/TogetherAI.py +345 -345
- webscout/Provider/OPENAI/TwoAI.py +465 -465
- webscout/Provider/OPENAI/__init__.py +33 -18
- webscout/Provider/OPENAI/base.py +248 -248
- webscout/Provider/OPENAI/chatglm.py +528 -0
- webscout/Provider/OPENAI/chatgpt.py +592 -592
- webscout/Provider/OPENAI/chatgptclone.py +521 -521
- webscout/Provider/OPENAI/chatsandbox.py +202 -202
- webscout/Provider/OPENAI/deepinfra.py +318 -314
- webscout/Provider/OPENAI/e2b.py +1665 -1665
- webscout/Provider/OPENAI/exaai.py +420 -420
- webscout/Provider/OPENAI/exachat.py +452 -452
- webscout/Provider/OPENAI/friendli.py +232 -232
- webscout/Provider/OPENAI/{refact.py → gmi.py} +324 -274
- webscout/Provider/OPENAI/groq.py +364 -364
- webscout/Provider/OPENAI/heckai.py +314 -314
- webscout/Provider/OPENAI/llmchatco.py +337 -337
- webscout/Provider/OPENAI/netwrck.py +355 -355
- webscout/Provider/OPENAI/oivscode.py +290 -290
- webscout/Provider/OPENAI/opkfc.py +518 -518
- webscout/Provider/OPENAI/pydantic_imports.py +1 -1
- webscout/Provider/OPENAI/scirachat.py +535 -535
- webscout/Provider/OPENAI/sonus.py +308 -308
- webscout/Provider/OPENAI/standardinput.py +442 -442
- webscout/Provider/OPENAI/textpollinations.py +340 -340
- webscout/Provider/OPENAI/toolbaz.py +419 -416
- webscout/Provider/OPENAI/typefully.py +362 -362
- webscout/Provider/OPENAI/utils.py +295 -295
- webscout/Provider/OPENAI/venice.py +436 -436
- webscout/Provider/OPENAI/wisecat.py +387 -387
- webscout/Provider/OPENAI/writecream.py +166 -166
- webscout/Provider/OPENAI/x0gpt.py +378 -378
- webscout/Provider/OPENAI/yep.py +389 -389
- webscout/Provider/OpenGPT.py +230 -230
- webscout/Provider/Openai.py +243 -243
- webscout/Provider/PI.py +405 -405
- webscout/Provider/Perplexitylabs.py +430 -430
- webscout/Provider/QwenLM.py +272 -272
- webscout/Provider/STT/__init__.py +16 -1
- webscout/Provider/Sambanova.py +257 -257
- webscout/Provider/StandardInput.py +309 -309
- webscout/Provider/TTI/README.md +82 -82
- webscout/Provider/TTI/__init__.py +33 -18
- webscout/Provider/TTI/aiarta.py +413 -413
- webscout/Provider/TTI/base.py +136 -136
- webscout/Provider/TTI/bing.py +243 -243
- webscout/Provider/TTI/gpt1image.py +149 -149
- webscout/Provider/TTI/imagen.py +196 -196
- webscout/Provider/TTI/infip.py +211 -211
- webscout/Provider/TTI/magicstudio.py +232 -232
- webscout/Provider/TTI/monochat.py +219 -219
- webscout/Provider/TTI/piclumen.py +214 -214
- webscout/Provider/TTI/pixelmuse.py +232 -232
- webscout/Provider/TTI/pollinations.py +232 -232
- webscout/Provider/TTI/together.py +288 -288
- webscout/Provider/TTI/utils.py +12 -12
- webscout/Provider/TTI/venice.py +367 -367
- webscout/Provider/TTS/README.md +192 -192
- webscout/Provider/TTS/__init__.py +33 -18
- webscout/Provider/TTS/parler.py +110 -110
- webscout/Provider/TTS/streamElements.py +333 -333
- webscout/Provider/TTS/utils.py +280 -280
- webscout/Provider/TeachAnything.py +237 -237
- webscout/Provider/TextPollinationsAI.py +310 -310
- webscout/Provider/TogetherAI.py +356 -356
- webscout/Provider/TwoAI.py +312 -312
- webscout/Provider/TypliAI.py +311 -311
- webscout/Provider/UNFINISHED/ChatHub.py +208 -208
- webscout/Provider/UNFINISHED/ChutesAI.py +313 -313
- webscout/Provider/UNFINISHED/GizAI.py +294 -294
- webscout/Provider/UNFINISHED/Marcus.py +198 -198
- webscout/Provider/UNFINISHED/Qodo.py +477 -477
- webscout/Provider/UNFINISHED/VercelAIGateway.py +338 -338
- webscout/Provider/UNFINISHED/XenAI.py +324 -324
- webscout/Provider/UNFINISHED/Youchat.py +330 -330
- webscout/Provider/UNFINISHED/liner.py +334 -0
- webscout/Provider/UNFINISHED/liner_api_request.py +262 -262
- webscout/Provider/UNFINISHED/puterjs.py +634 -634
- webscout/Provider/UNFINISHED/samurai.py +223 -223
- webscout/Provider/UNFINISHED/test_lmarena.py +119 -119
- webscout/Provider/Venice.py +250 -250
- webscout/Provider/VercelAI.py +256 -256
- webscout/Provider/WiseCat.py +231 -231
- webscout/Provider/WrDoChat.py +366 -366
- webscout/Provider/__init__.py +33 -18
- webscout/Provider/ai4chat.py +174 -174
- webscout/Provider/akashgpt.py +331 -331
- webscout/Provider/cerebras.py +446 -446
- webscout/Provider/chatglm.py +394 -301
- webscout/Provider/cleeai.py +211 -211
- webscout/Provider/elmo.py +282 -282
- webscout/Provider/geminiapi.py +208 -208
- webscout/Provider/granite.py +261 -261
- webscout/Provider/hermes.py +263 -263
- webscout/Provider/julius.py +223 -223
- webscout/Provider/learnfastai.py +309 -309
- webscout/Provider/llama3mitril.py +214 -214
- webscout/Provider/llmchat.py +243 -243
- webscout/Provider/llmchatco.py +290 -290
- webscout/Provider/meta.py +801 -801
- webscout/Provider/oivscode.py +309 -309
- webscout/Provider/scira_chat.py +383 -383
- webscout/Provider/searchchat.py +292 -292
- webscout/Provider/sonus.py +258 -258
- webscout/Provider/toolbaz.py +370 -367
- webscout/Provider/turboseek.py +273 -273
- webscout/Provider/typefully.py +207 -207
- webscout/Provider/yep.py +372 -372
- webscout/__init__.py +27 -31
- webscout/__main__.py +5 -5
- webscout/auth/api_key_manager.py +189 -189
- webscout/auth/config.py +175 -175
- webscout/auth/models.py +185 -185
- webscout/auth/routes.py +663 -664
- webscout/auth/simple_logger.py +236 -236
- webscout/cli.py +523 -523
- webscout/conversation.py +438 -438
- webscout/exceptions.py +361 -361
- webscout/litagent/Readme.md +298 -298
- webscout/litagent/__init__.py +28 -28
- webscout/litagent/agent.py +581 -581
- webscout/litagent/constants.py +59 -59
- webscout/litprinter/__init__.py +58 -58
- webscout/models.py +181 -181
- webscout/optimizers.py +419 -419
- webscout/prompt_manager.py +288 -288
- webscout/sanitize.py +1078 -1078
- webscout/scout/README.md +401 -401
- webscout/scout/__init__.py +8 -8
- webscout/scout/core/__init__.py +6 -6
- webscout/scout/core/crawler.py +297 -297
- webscout/scout/core/scout.py +706 -706
- webscout/scout/core/search_result.py +95 -95
- webscout/scout/core/text_analyzer.py +62 -62
- webscout/scout/core/text_utils.py +277 -277
- webscout/scout/core/web_analyzer.py +51 -51
- webscout/scout/element.py +599 -599
- webscout/scout/parsers/__init__.py +69 -69
- webscout/scout/parsers/html5lib_parser.py +172 -172
- webscout/scout/parsers/html_parser.py +236 -236
- webscout/scout/parsers/lxml_parser.py +178 -178
- webscout/scout/utils.py +37 -37
- webscout/search/__init__.py +51 -0
- webscout/search/base.py +195 -0
- webscout/search/duckduckgo_main.py +54 -0
- webscout/search/engines/__init__.py +48 -0
- webscout/search/engines/bing.py +84 -0
- webscout/search/engines/bing_news.py +52 -0
- webscout/search/engines/brave.py +43 -0
- webscout/search/engines/duckduckgo/__init__.py +25 -0
- webscout/search/engines/duckduckgo/answers.py +78 -0
- webscout/search/engines/duckduckgo/base.py +187 -0
- webscout/search/engines/duckduckgo/images.py +97 -0
- webscout/search/engines/duckduckgo/maps.py +168 -0
- webscout/search/engines/duckduckgo/news.py +68 -0
- webscout/search/engines/duckduckgo/suggestions.py +21 -0
- webscout/search/engines/duckduckgo/text.py +211 -0
- webscout/search/engines/duckduckgo/translate.py +47 -0
- webscout/search/engines/duckduckgo/videos.py +63 -0
- webscout/search/engines/duckduckgo/weather.py +74 -0
- webscout/search/engines/mojeek.py +37 -0
- webscout/search/engines/wikipedia.py +56 -0
- webscout/search/engines/yahoo.py +65 -0
- webscout/search/engines/yahoo_news.py +64 -0
- webscout/search/engines/yandex.py +43 -0
- webscout/search/engines/yep/__init__.py +13 -0
- webscout/search/engines/yep/base.py +32 -0
- webscout/search/engines/yep/images.py +99 -0
- webscout/search/engines/yep/suggestions.py +35 -0
- webscout/search/engines/yep/text.py +114 -0
- webscout/search/http_client.py +156 -0
- webscout/search/results.py +137 -0
- webscout/search/yep_main.py +44 -0
- webscout/swiftcli/Readme.md +323 -323
- webscout/swiftcli/__init__.py +95 -95
- webscout/swiftcli/core/__init__.py +7 -7
- webscout/swiftcli/core/cli.py +308 -308
- webscout/swiftcli/core/context.py +104 -104
- webscout/swiftcli/core/group.py +241 -241
- webscout/swiftcli/decorators/__init__.py +28 -28
- webscout/swiftcli/decorators/command.py +221 -221
- webscout/swiftcli/decorators/options.py +220 -220
- webscout/swiftcli/decorators/output.py +302 -302
- webscout/swiftcli/exceptions.py +21 -21
- webscout/swiftcli/plugins/__init__.py +9 -9
- webscout/swiftcli/plugins/base.py +135 -135
- webscout/swiftcli/plugins/manager.py +269 -269
- webscout/swiftcli/utils/__init__.py +59 -59
- webscout/swiftcli/utils/formatting.py +252 -252
- webscout/swiftcli/utils/parsing.py +267 -267
- webscout/update_checker.py +117 -117
- webscout/version.py +1 -1
- webscout/version.py.bak +2 -0
- webscout/zeroart/README.md +89 -89
- webscout/zeroart/__init__.py +134 -134
- webscout/zeroart/base.py +66 -66
- webscout/zeroart/effects.py +100 -100
- webscout/zeroart/fonts.py +1238 -1238
- {webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/METADATA +936 -937
- webscout-2025.10.13.dist-info/RECORD +329 -0
- webscout/Provider/AISEARCH/DeepFind.py +0 -254
- webscout/Provider/OPENAI/Qwen3.py +0 -303
- webscout/Provider/OPENAI/qodo.py +0 -630
- webscout/Provider/OPENAI/xenai.py +0 -514
- webscout/tempid.py +0 -134
- webscout/webscout_search.py +0 -1183
- webscout/webscout_search_async.py +0 -649
- webscout/yep_search.py +0 -346
- webscout-8.3.7.dist-info/RECORD +0 -301
- {webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/WHEEL +0 -0
- {webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/entry_points.txt +0 -0
- {webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/licenses/LICENSE.md +0 -0
- {webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/top_level.txt +0 -0
|
@@ -1,96 +1,96 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Scout Search Result Module
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
from typing import List, Union, Callable, Any, Dict, Iterator
|
|
6
|
-
from ..element import Tag
|
|
7
|
-
from .text_analyzer import ScoutTextAnalyzer
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class ScoutSearchResult:
|
|
11
|
-
"""
|
|
12
|
-
Represents a search result with advanced querying capabilities.
|
|
13
|
-
Enhanced with more intelligent filtering and processing.
|
|
14
|
-
"""
|
|
15
|
-
def __init__(self, results: List[Tag]):
|
|
16
|
-
"""
|
|
17
|
-
Initialize a search result collection.
|
|
18
|
-
|
|
19
|
-
Args:
|
|
20
|
-
results (List[Tag]): List of matching tags
|
|
21
|
-
"""
|
|
22
|
-
self._results = results
|
|
23
|
-
|
|
24
|
-
def __len__(self) -> int:
|
|
25
|
-
return len(self._results)
|
|
26
|
-
|
|
27
|
-
def __iter__(self) -> Iterator[Tag]:
|
|
28
|
-
return iter(self._results)
|
|
29
|
-
|
|
30
|
-
def __getitem__(self, index: Union[int, slice]) -> Union[Tag, List[Tag]]:
|
|
31
|
-
return self._results[index]
|
|
32
|
-
|
|
33
|
-
def texts(self, separator=' ', strip=True) -> List[str]:
|
|
34
|
-
"""
|
|
35
|
-
Extract texts from all results.
|
|
36
|
-
|
|
37
|
-
Args:
|
|
38
|
-
separator (str, optional): Text separator
|
|
39
|
-
strip (bool, optional): Strip whitespace
|
|
40
|
-
|
|
41
|
-
Returns:
|
|
42
|
-
List[str]: List of extracted texts
|
|
43
|
-
"""
|
|
44
|
-
return [tag.get_text(separator, strip) for tag in self._results]
|
|
45
|
-
|
|
46
|
-
def attrs(self, attr_name: str) -> List[Any]:
|
|
47
|
-
"""
|
|
48
|
-
Extract a specific attribute from all results.
|
|
49
|
-
|
|
50
|
-
Args:
|
|
51
|
-
attr_name (str): Attribute name to extract
|
|
52
|
-
|
|
53
|
-
Returns:
|
|
54
|
-
List[Any]: List of attribute values
|
|
55
|
-
"""
|
|
56
|
-
return [tag.get(attr_name) for tag in self._results]
|
|
57
|
-
|
|
58
|
-
def filter(self, predicate: Callable[[Tag], bool]) -> 'ScoutSearchResult':
|
|
59
|
-
"""
|
|
60
|
-
Filter results using a predicate function.
|
|
61
|
-
|
|
62
|
-
Args:
|
|
63
|
-
predicate (Callable[[Tag], bool]): Filtering function
|
|
64
|
-
|
|
65
|
-
Returns:
|
|
66
|
-
ScoutSearchResult: Filtered search results
|
|
67
|
-
"""
|
|
68
|
-
return ScoutSearchResult([tag for tag in self._results if predicate(tag)])
|
|
69
|
-
|
|
70
|
-
def map(self, transform: Callable[[Tag], Any]) -> List[Any]:
|
|
71
|
-
"""
|
|
72
|
-
Transform results using a mapping function.
|
|
73
|
-
|
|
74
|
-
Args:
|
|
75
|
-
transform (Callable[[Tag], Any]): Transformation function
|
|
76
|
-
|
|
77
|
-
Returns:
|
|
78
|
-
List[Any]: Transformed results
|
|
79
|
-
"""
|
|
80
|
-
return [transform(tag) for tag in self._results]
|
|
81
|
-
|
|
82
|
-
def analyze_text(self) -> Dict[str, Any]:
|
|
83
|
-
"""
|
|
84
|
-
Perform text analysis on search results.
|
|
85
|
-
|
|
86
|
-
Returns:
|
|
87
|
-
Dict[str, Any]: Text analysis results
|
|
88
|
-
"""
|
|
89
|
-
texts = self.texts(strip=True)
|
|
90
|
-
full_text = ' '.join(texts)
|
|
91
|
-
|
|
92
|
-
return {
|
|
93
|
-
'total_results': len(self._results),
|
|
94
|
-
'word_count': ScoutTextAnalyzer.count_words(full_text),
|
|
95
|
-
'entities': ScoutTextAnalyzer.extract_entities(full_text)
|
|
1
|
+
"""
|
|
2
|
+
Scout Search Result Module
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List, Union, Callable, Any, Dict, Iterator
|
|
6
|
+
from ..element import Tag
|
|
7
|
+
from .text_analyzer import ScoutTextAnalyzer
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ScoutSearchResult:
|
|
11
|
+
"""
|
|
12
|
+
Represents a search result with advanced querying capabilities.
|
|
13
|
+
Enhanced with more intelligent filtering and processing.
|
|
14
|
+
"""
|
|
15
|
+
def __init__(self, results: List[Tag]):
|
|
16
|
+
"""
|
|
17
|
+
Initialize a search result collection.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
results (List[Tag]): List of matching tags
|
|
21
|
+
"""
|
|
22
|
+
self._results = results
|
|
23
|
+
|
|
24
|
+
def __len__(self) -> int:
|
|
25
|
+
return len(self._results)
|
|
26
|
+
|
|
27
|
+
def __iter__(self) -> Iterator[Tag]:
|
|
28
|
+
return iter(self._results)
|
|
29
|
+
|
|
30
|
+
def __getitem__(self, index: Union[int, slice]) -> Union[Tag, List[Tag]]:
|
|
31
|
+
return self._results[index]
|
|
32
|
+
|
|
33
|
+
def texts(self, separator=' ', strip=True) -> List[str]:
|
|
34
|
+
"""
|
|
35
|
+
Extract texts from all results.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
separator (str, optional): Text separator
|
|
39
|
+
strip (bool, optional): Strip whitespace
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
List[str]: List of extracted texts
|
|
43
|
+
"""
|
|
44
|
+
return [tag.get_text(separator, strip) for tag in self._results]
|
|
45
|
+
|
|
46
|
+
def attrs(self, attr_name: str) -> List[Any]:
|
|
47
|
+
"""
|
|
48
|
+
Extract a specific attribute from all results.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
attr_name (str): Attribute name to extract
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
List[Any]: List of attribute values
|
|
55
|
+
"""
|
|
56
|
+
return [tag.get(attr_name) for tag in self._results]
|
|
57
|
+
|
|
58
|
+
def filter(self, predicate: Callable[[Tag], bool]) -> 'ScoutSearchResult':
|
|
59
|
+
"""
|
|
60
|
+
Filter results using a predicate function.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
predicate (Callable[[Tag], bool]): Filtering function
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
ScoutSearchResult: Filtered search results
|
|
67
|
+
"""
|
|
68
|
+
return ScoutSearchResult([tag for tag in self._results if predicate(tag)])
|
|
69
|
+
|
|
70
|
+
def map(self, transform: Callable[[Tag], Any]) -> List[Any]:
|
|
71
|
+
"""
|
|
72
|
+
Transform results using a mapping function.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
transform (Callable[[Tag], Any]): Transformation function
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
List[Any]: Transformed results
|
|
79
|
+
"""
|
|
80
|
+
return [transform(tag) for tag in self._results]
|
|
81
|
+
|
|
82
|
+
def analyze_text(self) -> Dict[str, Any]:
|
|
83
|
+
"""
|
|
84
|
+
Perform text analysis on search results.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
Dict[str, Any]: Text analysis results
|
|
88
|
+
"""
|
|
89
|
+
texts = self.texts(strip=True)
|
|
90
|
+
full_text = ' '.join(texts)
|
|
91
|
+
|
|
92
|
+
return {
|
|
93
|
+
'total_results': len(self._results),
|
|
94
|
+
'word_count': ScoutTextAnalyzer.count_words(full_text),
|
|
95
|
+
'entities': ScoutTextAnalyzer.extract_entities(full_text)
|
|
96
96
|
}
|
|
@@ -1,63 +1,63 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Scout Text Analyzer Module
|
|
3
|
-
"""
|
|
4
|
-
import re
|
|
5
|
-
from collections import Counter
|
|
6
|
-
from typing import List, Dict, Set
|
|
7
|
-
|
|
8
|
-
class ScoutTextAnalyzer:
|
|
9
|
-
"""
|
|
10
|
-
Advanced text analysis and processing utility.
|
|
11
|
-
"""
|
|
12
|
-
@staticmethod
|
|
13
|
-
def tokenize(text: str, lowercase=True, remove_punctuation=True) -> List[str]:
|
|
14
|
-
"""
|
|
15
|
-
Tokenize text into words.
|
|
16
|
-
|
|
17
|
-
Args:
|
|
18
|
-
text (str): Input text
|
|
19
|
-
lowercase (bool, optional): Convert to lowercase
|
|
20
|
-
remove_punctuation (bool, optional): Remove punctuation
|
|
21
|
-
|
|
22
|
-
Returns:
|
|
23
|
-
List[str]: List of tokens
|
|
24
|
-
"""
|
|
25
|
-
if lowercase:
|
|
26
|
-
text = text.lower()
|
|
27
|
-
|
|
28
|
-
if remove_punctuation:
|
|
29
|
-
text = re.sub(r'[^\w\s]', '', text)
|
|
30
|
-
|
|
31
|
-
return text.split()
|
|
32
|
-
|
|
33
|
-
@staticmethod
|
|
34
|
-
def count_words(text: str) -> Dict[str, int]:
|
|
35
|
-
"""
|
|
36
|
-
Count word frequencies.
|
|
37
|
-
|
|
38
|
-
Args:
|
|
39
|
-
text (str): Input text
|
|
40
|
-
|
|
41
|
-
Returns:
|
|
42
|
-
Dict[str, int]: Word frequency dictionary
|
|
43
|
-
"""
|
|
44
|
-
return dict(Counter(ScoutTextAnalyzer.tokenize(text)))
|
|
45
|
-
|
|
46
|
-
@staticmethod
|
|
47
|
-
def extract_entities(text: str) -> Dict[str, Set[str]]:
|
|
48
|
-
"""
|
|
49
|
-
Extract named entities from text.
|
|
50
|
-
|
|
51
|
-
Args:
|
|
52
|
-
text (str): Input text
|
|
53
|
-
|
|
54
|
-
Returns:
|
|
55
|
-
Dict[str, Set[str]]: Extracted entities
|
|
56
|
-
"""
|
|
57
|
-
entities = {
|
|
58
|
-
'emails': set(re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)),
|
|
59
|
-
'urls': set(re.findall(r'https?://\S+', text)),
|
|
60
|
-
'phones': set(re.findall(r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b', text)),
|
|
61
|
-
'dates': set(re.findall(r'\b\d{1,2}[-/]\d{1,2}[-/]\d{2,4}\b', text))
|
|
62
|
-
}
|
|
1
|
+
"""
|
|
2
|
+
Scout Text Analyzer Module
|
|
3
|
+
"""
|
|
4
|
+
import re
|
|
5
|
+
from collections import Counter
|
|
6
|
+
from typing import List, Dict, Set
|
|
7
|
+
|
|
8
|
+
class ScoutTextAnalyzer:
|
|
9
|
+
"""
|
|
10
|
+
Advanced text analysis and processing utility.
|
|
11
|
+
"""
|
|
12
|
+
@staticmethod
|
|
13
|
+
def tokenize(text: str, lowercase=True, remove_punctuation=True) -> List[str]:
|
|
14
|
+
"""
|
|
15
|
+
Tokenize text into words.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
text (str): Input text
|
|
19
|
+
lowercase (bool, optional): Convert to lowercase
|
|
20
|
+
remove_punctuation (bool, optional): Remove punctuation
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
List[str]: List of tokens
|
|
24
|
+
"""
|
|
25
|
+
if lowercase:
|
|
26
|
+
text = text.lower()
|
|
27
|
+
|
|
28
|
+
if remove_punctuation:
|
|
29
|
+
text = re.sub(r'[^\w\s]', '', text)
|
|
30
|
+
|
|
31
|
+
return text.split()
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def count_words(text: str) -> Dict[str, int]:
|
|
35
|
+
"""
|
|
36
|
+
Count word frequencies.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
text (str): Input text
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Dict[str, int]: Word frequency dictionary
|
|
43
|
+
"""
|
|
44
|
+
return dict(Counter(ScoutTextAnalyzer.tokenize(text)))
|
|
45
|
+
|
|
46
|
+
@staticmethod
|
|
47
|
+
def extract_entities(text: str) -> Dict[str, Set[str]]:
|
|
48
|
+
"""
|
|
49
|
+
Extract named entities from text.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
text (str): Input text
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Dict[str, Set[str]]: Extracted entities
|
|
56
|
+
"""
|
|
57
|
+
entities = {
|
|
58
|
+
'emails': set(re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)),
|
|
59
|
+
'urls': set(re.findall(r'https?://\S+', text)),
|
|
60
|
+
'phones': set(re.findall(r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b', text)),
|
|
61
|
+
'dates': set(re.findall(r'\b\d{1,2}[-/]\d{1,2}[-/]\d{2,4}\b', text))
|
|
62
|
+
}
|
|
63
63
|
return entities
|