webscout 8.2.2__py3-none-any.whl → 8.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (306) hide show
  1. webscout/AIauto.py +112 -22
  2. webscout/AIbase.py +144 -7
  3. webscout/AIutel.py +249 -131
  4. webscout/Bard.py +579 -206
  5. webscout/DWEBS.py +78 -35
  6. webscout/__init__.py +0 -1
  7. webscout/cli.py +256 -0
  8. webscout/conversation.py +307 -436
  9. webscout/exceptions.py +23 -0
  10. webscout/prompt_manager.py +56 -42
  11. webscout/version.py +1 -1
  12. webscout/webscout_search.py +65 -47
  13. webscout/webscout_search_async.py +81 -126
  14. webscout/yep_search.py +93 -43
  15. {webscout-8.2.2.dist-info → webscout-8.2.7.dist-info}/METADATA +172 -52
  16. webscout-8.2.7.dist-info/RECORD +26 -0
  17. {webscout-8.2.2.dist-info → webscout-8.2.7.dist-info}/WHEEL +1 -1
  18. webscout-8.2.7.dist-info/entry_points.txt +3 -0
  19. webscout-8.2.7.dist-info/top_level.txt +1 -0
  20. inferno/__init__.py +0 -6
  21. inferno/__main__.py +0 -9
  22. inferno/cli.py +0 -6
  23. webscout/Extra/GitToolkit/__init__.py +0 -10
  24. webscout/Extra/GitToolkit/gitapi/__init__.py +0 -12
  25. webscout/Extra/GitToolkit/gitapi/repository.py +0 -195
  26. webscout/Extra/GitToolkit/gitapi/user.py +0 -96
  27. webscout/Extra/GitToolkit/gitapi/utils.py +0 -62
  28. webscout/Extra/YTToolkit/YTdownloader.py +0 -957
  29. webscout/Extra/YTToolkit/__init__.py +0 -3
  30. webscout/Extra/YTToolkit/transcriber.py +0 -476
  31. webscout/Extra/YTToolkit/ytapi/__init__.py +0 -6
  32. webscout/Extra/YTToolkit/ytapi/channel.py +0 -307
  33. webscout/Extra/YTToolkit/ytapi/errors.py +0 -13
  34. webscout/Extra/YTToolkit/ytapi/extras.py +0 -45
  35. webscout/Extra/YTToolkit/ytapi/https.py +0 -88
  36. webscout/Extra/YTToolkit/ytapi/patterns.py +0 -61
  37. webscout/Extra/YTToolkit/ytapi/playlist.py +0 -59
  38. webscout/Extra/YTToolkit/ytapi/pool.py +0 -8
  39. webscout/Extra/YTToolkit/ytapi/query.py +0 -40
  40. webscout/Extra/YTToolkit/ytapi/stream.py +0 -63
  41. webscout/Extra/YTToolkit/ytapi/utils.py +0 -62
  42. webscout/Extra/YTToolkit/ytapi/video.py +0 -232
  43. webscout/Extra/__init__.py +0 -7
  44. webscout/Extra/autocoder/__init__.py +0 -9
  45. webscout/Extra/autocoder/autocoder.py +0 -849
  46. webscout/Extra/autocoder/autocoder_utiles.py +0 -332
  47. webscout/Extra/gguf.py +0 -682
  48. webscout/Extra/tempmail/__init__.py +0 -28
  49. webscout/Extra/tempmail/async_utils.py +0 -141
  50. webscout/Extra/tempmail/base.py +0 -161
  51. webscout/Extra/tempmail/cli.py +0 -187
  52. webscout/Extra/tempmail/emailnator.py +0 -84
  53. webscout/Extra/tempmail/mail_tm.py +0 -361
  54. webscout/Extra/tempmail/temp_mail_io.py +0 -292
  55. webscout/Extra/weather.py +0 -194
  56. webscout/Extra/weather_ascii.py +0 -76
  57. webscout/LLM.py +0 -442
  58. webscout/Litlogger/__init__.py +0 -67
  59. webscout/Litlogger/core/__init__.py +0 -6
  60. webscout/Litlogger/core/level.py +0 -23
  61. webscout/Litlogger/core/logger.py +0 -165
  62. webscout/Litlogger/handlers/__init__.py +0 -12
  63. webscout/Litlogger/handlers/console.py +0 -33
  64. webscout/Litlogger/handlers/file.py +0 -143
  65. webscout/Litlogger/handlers/network.py +0 -173
  66. webscout/Litlogger/styles/__init__.py +0 -7
  67. webscout/Litlogger/styles/colors.py +0 -249
  68. webscout/Litlogger/styles/formats.py +0 -458
  69. webscout/Litlogger/styles/text.py +0 -87
  70. webscout/Litlogger/utils/__init__.py +0 -6
  71. webscout/Litlogger/utils/detectors.py +0 -153
  72. webscout/Litlogger/utils/formatters.py +0 -200
  73. webscout/Local/__init__.py +0 -12
  74. webscout/Local/__main__.py +0 -9
  75. webscout/Local/api.py +0 -576
  76. webscout/Local/cli.py +0 -516
  77. webscout/Local/config.py +0 -75
  78. webscout/Local/llm.py +0 -287
  79. webscout/Local/model_manager.py +0 -253
  80. webscout/Local/server.py +0 -721
  81. webscout/Local/utils.py +0 -93
  82. webscout/Provider/AI21.py +0 -177
  83. webscout/Provider/AISEARCH/DeepFind.py +0 -250
  84. webscout/Provider/AISEARCH/ISou.py +0 -256
  85. webscout/Provider/AISEARCH/Perplexity.py +0 -359
  86. webscout/Provider/AISEARCH/__init__.py +0 -10
  87. webscout/Provider/AISEARCH/felo_search.py +0 -228
  88. webscout/Provider/AISEARCH/genspark_search.py +0 -208
  89. webscout/Provider/AISEARCH/hika_search.py +0 -194
  90. webscout/Provider/AISEARCH/iask_search.py +0 -436
  91. webscout/Provider/AISEARCH/monica_search.py +0 -246
  92. webscout/Provider/AISEARCH/scira_search.py +0 -324
  93. webscout/Provider/AISEARCH/webpilotai_search.py +0 -281
  94. webscout/Provider/Aitopia.py +0 -292
  95. webscout/Provider/AllenAI.py +0 -413
  96. webscout/Provider/Andi.py +0 -228
  97. webscout/Provider/Blackboxai.py +0 -229
  98. webscout/Provider/C4ai.py +0 -432
  99. webscout/Provider/ChatGPTClone.py +0 -226
  100. webscout/Provider/ChatGPTES.py +0 -237
  101. webscout/Provider/ChatGPTGratis.py +0 -194
  102. webscout/Provider/Chatify.py +0 -175
  103. webscout/Provider/Cloudflare.py +0 -273
  104. webscout/Provider/Cohere.py +0 -208
  105. webscout/Provider/DeepSeek.py +0 -196
  106. webscout/Provider/Deepinfra.py +0 -297
  107. webscout/Provider/ElectronHub.py +0 -709
  108. webscout/Provider/ExaAI.py +0 -261
  109. webscout/Provider/ExaChat.py +0 -342
  110. webscout/Provider/Free2GPT.py +0 -241
  111. webscout/Provider/GPTWeb.py +0 -193
  112. webscout/Provider/Gemini.py +0 -169
  113. webscout/Provider/GithubChat.py +0 -367
  114. webscout/Provider/Glider.py +0 -211
  115. webscout/Provider/Groq.py +0 -670
  116. webscout/Provider/HF_space/__init__.py +0 -0
  117. webscout/Provider/HF_space/qwen_qwen2.py +0 -206
  118. webscout/Provider/HeckAI.py +0 -233
  119. webscout/Provider/HuggingFaceChat.py +0 -462
  120. webscout/Provider/Hunyuan.py +0 -272
  121. webscout/Provider/Jadve.py +0 -266
  122. webscout/Provider/Koboldai.py +0 -381
  123. webscout/Provider/LambdaChat.py +0 -392
  124. webscout/Provider/Llama.py +0 -200
  125. webscout/Provider/Llama3.py +0 -204
  126. webscout/Provider/Marcus.py +0 -148
  127. webscout/Provider/Netwrck.py +0 -228
  128. webscout/Provider/OLLAMA.py +0 -396
  129. webscout/Provider/OPENAI/__init__.py +0 -25
  130. webscout/Provider/OPENAI/base.py +0 -46
  131. webscout/Provider/OPENAI/c4ai.py +0 -367
  132. webscout/Provider/OPENAI/chatgpt.py +0 -549
  133. webscout/Provider/OPENAI/chatgptclone.py +0 -460
  134. webscout/Provider/OPENAI/deepinfra.py +0 -272
  135. webscout/Provider/OPENAI/e2b.py +0 -1350
  136. webscout/Provider/OPENAI/exaai.py +0 -404
  137. webscout/Provider/OPENAI/exachat.py +0 -433
  138. webscout/Provider/OPENAI/freeaichat.py +0 -352
  139. webscout/Provider/OPENAI/glider.py +0 -316
  140. webscout/Provider/OPENAI/heckai.py +0 -337
  141. webscout/Provider/OPENAI/llmchatco.py +0 -327
  142. webscout/Provider/OPENAI/netwrck.py +0 -348
  143. webscout/Provider/OPENAI/opkfc.py +0 -488
  144. webscout/Provider/OPENAI/scirachat.py +0 -463
  145. webscout/Provider/OPENAI/sonus.py +0 -294
  146. webscout/Provider/OPENAI/standardinput.py +0 -425
  147. webscout/Provider/OPENAI/textpollinations.py +0 -285
  148. webscout/Provider/OPENAI/toolbaz.py +0 -405
  149. webscout/Provider/OPENAI/typegpt.py +0 -346
  150. webscout/Provider/OPENAI/uncovrAI.py +0 -455
  151. webscout/Provider/OPENAI/utils.py +0 -211
  152. webscout/Provider/OPENAI/venice.py +0 -413
  153. webscout/Provider/OPENAI/wisecat.py +0 -381
  154. webscout/Provider/OPENAI/writecream.py +0 -156
  155. webscout/Provider/OPENAI/x0gpt.py +0 -371
  156. webscout/Provider/OPENAI/yep.py +0 -327
  157. webscout/Provider/OpenGPT.py +0 -199
  158. webscout/Provider/Openai.py +0 -496
  159. webscout/Provider/PI.py +0 -344
  160. webscout/Provider/Perplexitylabs.py +0 -415
  161. webscout/Provider/Phind.py +0 -535
  162. webscout/Provider/PizzaGPT.py +0 -198
  163. webscout/Provider/QwenLM.py +0 -254
  164. webscout/Provider/Reka.py +0 -214
  165. webscout/Provider/StandardInput.py +0 -278
  166. webscout/Provider/TTI/AiForce/__init__.py +0 -22
  167. webscout/Provider/TTI/AiForce/async_aiforce.py +0 -224
  168. webscout/Provider/TTI/AiForce/sync_aiforce.py +0 -245
  169. webscout/Provider/TTI/FreeAIPlayground/__init__.py +0 -9
  170. webscout/Provider/TTI/FreeAIPlayground/async_freeaiplayground.py +0 -181
  171. webscout/Provider/TTI/FreeAIPlayground/sync_freeaiplayground.py +0 -180
  172. webscout/Provider/TTI/ImgSys/__init__.py +0 -23
  173. webscout/Provider/TTI/ImgSys/async_imgsys.py +0 -202
  174. webscout/Provider/TTI/ImgSys/sync_imgsys.py +0 -195
  175. webscout/Provider/TTI/MagicStudio/__init__.py +0 -2
  176. webscout/Provider/TTI/MagicStudio/async_magicstudio.py +0 -111
  177. webscout/Provider/TTI/MagicStudio/sync_magicstudio.py +0 -109
  178. webscout/Provider/TTI/Nexra/__init__.py +0 -22
  179. webscout/Provider/TTI/Nexra/async_nexra.py +0 -286
  180. webscout/Provider/TTI/Nexra/sync_nexra.py +0 -258
  181. webscout/Provider/TTI/PollinationsAI/__init__.py +0 -23
  182. webscout/Provider/TTI/PollinationsAI/async_pollinations.py +0 -311
  183. webscout/Provider/TTI/PollinationsAI/sync_pollinations.py +0 -265
  184. webscout/Provider/TTI/__init__.py +0 -12
  185. webscout/Provider/TTI/aiarta/__init__.py +0 -2
  186. webscout/Provider/TTI/aiarta/async_aiarta.py +0 -482
  187. webscout/Provider/TTI/aiarta/sync_aiarta.py +0 -440
  188. webscout/Provider/TTI/artbit/__init__.py +0 -22
  189. webscout/Provider/TTI/artbit/async_artbit.py +0 -155
  190. webscout/Provider/TTI/artbit/sync_artbit.py +0 -148
  191. webscout/Provider/TTI/fastflux/__init__.py +0 -22
  192. webscout/Provider/TTI/fastflux/async_fastflux.py +0 -261
  193. webscout/Provider/TTI/fastflux/sync_fastflux.py +0 -252
  194. webscout/Provider/TTI/huggingface/__init__.py +0 -22
  195. webscout/Provider/TTI/huggingface/async_huggingface.py +0 -199
  196. webscout/Provider/TTI/huggingface/sync_huggingface.py +0 -195
  197. webscout/Provider/TTI/piclumen/__init__.py +0 -23
  198. webscout/Provider/TTI/piclumen/async_piclumen.py +0 -268
  199. webscout/Provider/TTI/piclumen/sync_piclumen.py +0 -233
  200. webscout/Provider/TTI/pixelmuse/__init__.py +0 -4
  201. webscout/Provider/TTI/pixelmuse/async_pixelmuse.py +0 -249
  202. webscout/Provider/TTI/pixelmuse/sync_pixelmuse.py +0 -182
  203. webscout/Provider/TTI/talkai/__init__.py +0 -4
  204. webscout/Provider/TTI/talkai/async_talkai.py +0 -229
  205. webscout/Provider/TTI/talkai/sync_talkai.py +0 -207
  206. webscout/Provider/TTS/__init__.py +0 -7
  207. webscout/Provider/TTS/deepgram.py +0 -156
  208. webscout/Provider/TTS/elevenlabs.py +0 -111
  209. webscout/Provider/TTS/gesserit.py +0 -127
  210. webscout/Provider/TTS/murfai.py +0 -113
  211. webscout/Provider/TTS/parler.py +0 -111
  212. webscout/Provider/TTS/speechma.py +0 -180
  213. webscout/Provider/TTS/streamElements.py +0 -333
  214. webscout/Provider/TTS/utils.py +0 -280
  215. webscout/Provider/TeachAnything.py +0 -187
  216. webscout/Provider/TextPollinationsAI.py +0 -231
  217. webscout/Provider/TwoAI.py +0 -199
  218. webscout/Provider/Venice.py +0 -219
  219. webscout/Provider/VercelAI.py +0 -234
  220. webscout/Provider/WebSim.py +0 -228
  221. webscout/Provider/WiseCat.py +0 -196
  222. webscout/Provider/Writecream.py +0 -211
  223. webscout/Provider/WritingMate.py +0 -197
  224. webscout/Provider/Youchat.py +0 -330
  225. webscout/Provider/__init__.py +0 -198
  226. webscout/Provider/ai4chat.py +0 -202
  227. webscout/Provider/aimathgpt.py +0 -189
  228. webscout/Provider/akashgpt.py +0 -342
  229. webscout/Provider/askmyai.py +0 -158
  230. webscout/Provider/asksteve.py +0 -203
  231. webscout/Provider/bagoodex.py +0 -145
  232. webscout/Provider/cerebras.py +0 -242
  233. webscout/Provider/chatglm.py +0 -205
  234. webscout/Provider/cleeai.py +0 -213
  235. webscout/Provider/copilot.py +0 -428
  236. webscout/Provider/elmo.py +0 -234
  237. webscout/Provider/freeaichat.py +0 -271
  238. webscout/Provider/gaurish.py +0 -244
  239. webscout/Provider/geminiapi.py +0 -208
  240. webscout/Provider/geminiprorealtime.py +0 -160
  241. webscout/Provider/granite.py +0 -187
  242. webscout/Provider/hermes.py +0 -219
  243. webscout/Provider/julius.py +0 -223
  244. webscout/Provider/koala.py +0 -268
  245. webscout/Provider/labyrinth.py +0 -340
  246. webscout/Provider/learnfastai.py +0 -266
  247. webscout/Provider/lepton.py +0 -194
  248. webscout/Provider/llama3mitril.py +0 -180
  249. webscout/Provider/llamatutor.py +0 -192
  250. webscout/Provider/llmchat.py +0 -213
  251. webscout/Provider/llmchatco.py +0 -311
  252. webscout/Provider/meta.py +0 -794
  253. webscout/Provider/multichat.py +0 -325
  254. webscout/Provider/promptrefine.py +0 -193
  255. webscout/Provider/scira_chat.py +0 -277
  256. webscout/Provider/scnet.py +0 -187
  257. webscout/Provider/searchchat.py +0 -293
  258. webscout/Provider/sonus.py +0 -208
  259. webscout/Provider/talkai.py +0 -194
  260. webscout/Provider/toolbaz.py +0 -320
  261. webscout/Provider/turboseek.py +0 -219
  262. webscout/Provider/tutorai.py +0 -252
  263. webscout/Provider/typefully.py +0 -280
  264. webscout/Provider/typegpt.py +0 -232
  265. webscout/Provider/uncovr.py +0 -312
  266. webscout/Provider/x0gpt.py +0 -256
  267. webscout/Provider/yep.py +0 -376
  268. webscout/litagent/__init__.py +0 -29
  269. webscout/litagent/agent.py +0 -455
  270. webscout/litagent/constants.py +0 -60
  271. webscout/litprinter/__init__.py +0 -59
  272. webscout/scout/__init__.py +0 -8
  273. webscout/scout/core/__init__.py +0 -7
  274. webscout/scout/core/crawler.py +0 -140
  275. webscout/scout/core/scout.py +0 -568
  276. webscout/scout/core/search_result.py +0 -96
  277. webscout/scout/core/text_analyzer.py +0 -63
  278. webscout/scout/core/text_utils.py +0 -277
  279. webscout/scout/core/web_analyzer.py +0 -52
  280. webscout/scout/core.py +0 -881
  281. webscout/scout/element.py +0 -460
  282. webscout/scout/parsers/__init__.py +0 -69
  283. webscout/scout/parsers/html5lib_parser.py +0 -172
  284. webscout/scout/parsers/html_parser.py +0 -236
  285. webscout/scout/parsers/lxml_parser.py +0 -178
  286. webscout/scout/utils.py +0 -37
  287. webscout/swiftcli/__init__.py +0 -809
  288. webscout/zeroart/__init__.py +0 -55
  289. webscout/zeroart/base.py +0 -60
  290. webscout/zeroart/effects.py +0 -99
  291. webscout/zeroart/fonts.py +0 -816
  292. webscout-8.2.2.dist-info/RECORD +0 -309
  293. webscout-8.2.2.dist-info/entry_points.txt +0 -5
  294. webscout-8.2.2.dist-info/top_level.txt +0 -3
  295. webstoken/__init__.py +0 -30
  296. webstoken/classifier.py +0 -189
  297. webstoken/keywords.py +0 -216
  298. webstoken/language.py +0 -128
  299. webstoken/ner.py +0 -164
  300. webstoken/normalizer.py +0 -35
  301. webstoken/processor.py +0 -77
  302. webstoken/sentiment.py +0 -206
  303. webstoken/stemmer.py +0 -73
  304. webstoken/tagger.py +0 -60
  305. webstoken/tokenizer.py +0 -158
  306. {webscout-8.2.2.dist-info → webscout-8.2.7.dist-info/licenses}/LICENSE.md +0 -0
webstoken/sentiment.py DELETED
@@ -1,206 +0,0 @@
1
- """
2
- Sentiment analysis module for determining text sentiment and emotion.
3
- """
4
-
5
- from typing import Dict, List, Set, Tuple
6
- import re
7
-
8
- from .tokenizer import WordTokenizer
9
- from .normalizer import TextNormalizer
10
-
11
-
12
- class SentimentAnalyzer:
13
- """Rule-based sentiment analysis using lexicon approach."""
14
-
15
- def __init__(self):
16
- self.word_tokenizer = WordTokenizer()
17
- self.normalizer = TextNormalizer()
18
-
19
- # Sentiment lexicons
20
- self.positive_words: Set[str] = {
21
- 'good', 'great', 'awesome', 'excellent', 'happy', 'wonderful',
22
- 'fantastic', 'amazing', 'love', 'beautiful', 'best', 'perfect',
23
- 'brilliant', 'outstanding', 'superb', 'nice', 'pleasant', 'delightful',
24
- 'positive', 'remarkable', 'terrific', 'incredible', 'enjoyable',
25
- 'favorable', 'marvelous', 'splendid', 'superior', 'worthy', 'right'
26
- }
27
-
28
- self.negative_words: Set[str] = {
29
- 'bad', 'terrible', 'awful', 'horrible', 'sad', 'poor', 'wrong',
30
- 'worse', 'worst', 'hate', 'dislike', 'disappointing', 'negative',
31
- 'inferior', 'useless', 'worthless', 'mediocre', 'inadequate',
32
- 'unpleasant', 'unfavorable', 'disagreeable', 'offensive', 'annoying',
33
- 'frustrating', 'irritating', 'disgusting', 'dreadful', 'pathetic'
34
- }
35
-
36
- # Emotion lexicons
37
- self.emotion_words = {
38
- 'JOY': {
39
- 'happy', 'joyful', 'delighted', 'excited', 'pleased', 'glad',
40
- 'cheerful', 'content', 'satisfied', 'elated', 'jubilant',
41
- 'thrilled', 'ecstatic', 'merry', 'peaceful', 'upbeat'
42
- },
43
- 'SADNESS': {
44
- 'sad', 'unhappy', 'depressed', 'gloomy', 'miserable', 'down',
45
- 'heartbroken', 'disappointed', 'upset', 'distressed', 'grief',
46
- 'sorrow', 'melancholy', 'despair', 'hopeless', 'blue'
47
- },
48
- 'ANGER': {
49
- 'angry', 'mad', 'furious', 'outraged', 'irritated', 'annoyed',
50
- 'frustrated', 'enraged', 'hostile', 'bitter', 'hateful', 'rage',
51
- 'resentful', 'violent', 'aggressive', 'irate'
52
- },
53
- 'FEAR': {
54
- 'afraid', 'scared', 'frightened', 'terrified', 'anxious', 'worried',
55
- 'nervous', 'fearful', 'panicked', 'alarmed', 'horrified', 'dread',
56
- 'uneasy', 'stressed', 'concerned', 'apprehensive'
57
- },
58
- 'SURPRISE': {
59
- 'surprised', 'amazed', 'astonished', 'shocked', 'stunned',
60
- 'startled', 'unexpected', 'incredible', 'unbelievable', 'wonder',
61
- 'awe', 'remarkable', 'mysterious', 'sudden', 'strange'
62
- }
63
- }
64
-
65
- # Intensity modifiers
66
- self.intensifiers = {
67
- 'very': 1.5,
68
- 'really': 1.5,
69
- 'extremely': 2.0,
70
- 'incredibly': 2.0,
71
- 'absolutely': 2.0,
72
- 'totally': 1.5,
73
- 'completely': 1.5,
74
- 'utterly': 2.0,
75
- 'highly': 1.5,
76
- 'especially': 1.5
77
- }
78
-
79
- self.diminishers = {
80
- 'somewhat': 0.5,
81
- 'slightly': 0.5,
82
- 'barely': 0.3,
83
- 'hardly': 0.3,
84
- 'sort of': 0.5,
85
- 'kind of': 0.5,
86
- 'a bit': 0.5,
87
- 'a little': 0.5,
88
- 'not very': 0.3,
89
- 'less': 0.5
90
- }
91
-
92
- # Negation words
93
- self.negation_words = {
94
- 'not', 'no', 'never', 'none', 'nobody', 'nothing', 'neither',
95
- 'nowhere', 'hardly', 'scarcely', 'barely', "don't", "doesn't",
96
- "didn't", "won't", "wouldn't", "shouldn't", "couldn't", "can't"
97
- }
98
-
99
- # Compile patterns
100
- self.word_pattern = re.compile(r'\b\w+\b')
101
-
102
- def _get_window_around_word(self, words: List[str], index: int, window_size: int = 3) -> List[str]:
103
- """Get a window of words around a given index."""
104
- start = max(0, index - window_size)
105
- end = min(len(words), index + window_size + 1)
106
- return words[start:end]
107
-
108
- def _is_negated(self, words: List[str], index: int) -> bool:
109
- """Check if a word is negated by looking at surrounding context."""
110
- window = self._get_window_around_word(words, index)
111
- return any(word in self.negation_words for word in window[:index-window[0]])
112
-
113
- def _get_intensity_multiplier(self, words: List[str], index: int) -> float:
114
- """Get intensity multiplier based on modifiers."""
115
- window = self._get_window_around_word(words, index)
116
- multiplier = 1.0
117
-
118
- for word in window[:index-window[0]]:
119
- if word in self.intensifiers:
120
- multiplier *= self.intensifiers[word]
121
- elif word in self.diminishers:
122
- multiplier *= self.diminishers[word]
123
-
124
- return multiplier
125
-
126
- def analyze_sentiment(self, text: str) -> Dict[str, float]:
127
- """
128
- Analyze sentiment of text.
129
-
130
- Returns:
131
- Dict with sentiment scores:
132
- {
133
- 'polarity': float (-1 to 1),
134
- 'subjectivity': float (0 to 1),
135
- 'confidence': float (0 to 1)
136
- }
137
- """
138
- # Normalize and tokenize text
139
- text = self.normalizer.normalize(text)
140
- words = self.word_tokenizer.tokenize(text)
141
-
142
- positive_score = 0
143
- negative_score = 0
144
- word_count = len(words)
145
-
146
- for i, word in enumerate(words):
147
- word = word.lower()
148
- multiplier = self._get_intensity_multiplier(words, i)
149
- is_negated = self._is_negated(words, i)
150
-
151
- if word in self.positive_words:
152
- score = 1.0 * multiplier
153
- positive_score += -score if is_negated else score
154
- elif word in self.negative_words:
155
- score = 1.0 * multiplier
156
- negative_score += -score if is_negated else score
157
-
158
- # Calculate metrics
159
- total_score = positive_score + negative_score
160
- total_magnitude = abs(positive_score) + abs(negative_score)
161
-
162
- if word_count == 0:
163
- return {'polarity': 0.0, 'subjectivity': 0.0, 'confidence': 0.0}
164
-
165
- polarity = total_score / (word_count or 1) # Normalize to [-1, 1]
166
- subjectivity = total_magnitude / (word_count or 1) # Normalize to [0, 1]
167
- confidence = min(1.0, total_magnitude / (word_count / 2)) # Confidence based on magnitude
168
-
169
- return {
170
- 'polarity': max(-1.0, min(1.0, polarity)),
171
- 'subjectivity': min(1.0, subjectivity),
172
- 'confidence': confidence
173
- }
174
-
175
- def analyze_emotions(self, text: str) -> List[Tuple[str, float]]:
176
- """
177
- Analyze emotions in text.
178
-
179
- Returns:
180
- List of (emotion, score) tuples, sorted by score
181
- """
182
- # Normalize and tokenize text
183
- text = self.normalizer.normalize(text)
184
- words = self.word_tokenizer.tokenize(text)
185
-
186
- emotion_scores = {emotion: 0.0 for emotion in self.emotion_words}
187
-
188
- for i, word in enumerate(words):
189
- word = word.lower()
190
- multiplier = self._get_intensity_multiplier(words, i)
191
- is_negated = self._is_negated(words, i)
192
-
193
- for emotion, emotion_set in self.emotion_words.items():
194
- if word in emotion_set:
195
- score = 1.0 * multiplier
196
- emotion_scores[emotion] += -score if is_negated else score
197
-
198
- # Normalize scores
199
- max_score = max(abs(score) for score in emotion_scores.values()) or 1
200
- normalized_scores = [
201
- (emotion, score/max_score)
202
- for emotion, score in emotion_scores.items()
203
- ]
204
-
205
- # Sort by score
206
- return sorted(normalized_scores, key=lambda x: x[1], reverse=True)
webstoken/stemmer.py DELETED
@@ -1,73 +0,0 @@
1
- """
2
- Word stemming utilities.
3
- """
4
-
5
- from typing import Set
6
-
7
-
8
- class Stemmer:
9
- """Simple rule-based stemmer implementing Porter-like rules."""
10
-
11
- def __init__(self):
12
- self.vowels: Set[str] = {'a', 'e', 'i', 'o', 'u', 'y'}
13
- self.doubles: Set[str] = {'bb', 'dd', 'ff', 'gg', 'mm', 'nn', 'pp', 'rr', 'tt'}
14
-
15
- def is_vowel(self, char: str, prev_char: str = None) -> bool:
16
- """Check if a character is a vowel, considering 'y' special cases."""
17
- return char in self.vowels or (char == 'y' and prev_char and prev_char not in self.vowels)
18
-
19
- def count_syllables(self, word: str) -> int:
20
- """Count syllables in a word based on vowel sequences."""
21
- count = 0
22
- prev_char = None
23
- for i, char in enumerate(word.lower()):
24
- if self.is_vowel(char, prev_char) and (i == 0 or not self.is_vowel(prev_char, word[i-2] if i > 1 else None)):
25
- count += 1
26
- prev_char = char
27
- return count or 1
28
-
29
- def stem(self, word: str) -> str:
30
- """Apply stemming rules to reduce word to its root form."""
31
- if len(word) <= 3:
32
- return word
33
-
34
- word = word.lower()
35
-
36
- # Step 1: Handle plurals and past participles
37
- if word.endswith('sses'):
38
- word = word[:-2]
39
- elif word.endswith('ies'):
40
- word = word[:-2]
41
- elif word.endswith('ss'):
42
- pass
43
- elif word.endswith('s') and len(word) > 4:
44
- word = word[:-1]
45
-
46
- # Step 2: Handle -ed and -ing
47
- if word.endswith('ed') and self.count_syllables(word[:-2]) > 1:
48
- word = word[:-2]
49
- elif word.endswith('ing') and self.count_syllables(word[:-3]) > 1:
50
- word = word[:-3]
51
-
52
- # Step 3: Handle miscellaneous endings
53
- if len(word) > 5:
54
- if word.endswith('ement'):
55
- word = word[:-5]
56
- elif word.endswith('ment'):
57
- word = word[:-4]
58
- elif word.endswith('ent'):
59
- word = word[:-3]
60
-
61
- # Step 4: Handle -ity endings
62
- if word.endswith('ity') and len(word) > 6:
63
- word = word[:-3]
64
- if word.endswith('abil'):
65
- word = word[:-4] + 'able'
66
- elif word.endswith('ic'):
67
- word = word[:-2]
68
-
69
- # Final step: Remove double consonants at the end
70
- if len(word) > 2 and word[-2:] in self.doubles:
71
- word = word[:-1]
72
-
73
- return word
webstoken/tagger.py DELETED
@@ -1,60 +0,0 @@
1
- """
2
- Part-of-Speech tagging utilities.
3
- """
4
-
5
- from typing import List, Set, Tuple
6
-
7
-
8
- class POSTagger:
9
- """Simple rule-based Part-of-Speech tagger."""
10
-
11
- def __init__(self):
12
- # Basic rules for POS tagging
13
- self.noun_suffixes: Set[str] = {'ness', 'ment', 'ship', 'dom', 'hood', 'er', 'or', 'ist'}
14
- self.verb_suffixes: Set[str] = {'ize', 'ate', 'ify', 'ing', 'ed'}
15
- self.adj_suffixes: Set[str] = {'able', 'ible', 'al', 'ful', 'ous', 'ive', 'less'}
16
- self.adv_suffixes: Set[str] = {'ly'}
17
-
18
- # Common words by POS
19
- self.determiners: Set[str] = {'the', 'a', 'an', 'this', 'that', 'these', 'those'}
20
- self.prepositions: Set[str] = {'in', 'on', 'at', 'by', 'with', 'from', 'to', 'for'}
21
- self.pronouns: Set[str] = {'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her'}
22
-
23
- def tag(self, tokens: List[str]) -> List[Tuple[str, str]]:
24
- """Assign POS tags to tokens based on rules."""
25
- tagged = []
26
- prev_tag = None
27
-
28
- for i, token in enumerate(tokens):
29
- word = token.lower()
30
-
31
- # Check special cases first
32
- if word in self.determiners:
33
- tag = 'DET'
34
- elif word in self.prepositions:
35
- tag = 'PREP'
36
- elif word in self.pronouns:
37
- tag = 'PRON'
38
- # Check suffixes
39
- elif any(word.endswith(suffix) for suffix in self.noun_suffixes):
40
- tag = 'NOUN'
41
- elif any(word.endswith(suffix) for suffix in self.verb_suffixes):
42
- tag = 'VERB'
43
- elif any(word.endswith(suffix) for suffix in self.adj_suffixes):
44
- tag = 'ADJ'
45
- elif any(word.endswith(suffix) for suffix in self.adv_suffixes):
46
- tag = 'ADV'
47
- # Default cases
48
- elif word[0].isupper() and i > 0:
49
- tag = 'PROPN' # Proper noun
50
- elif word.isdigit():
51
- tag = 'NUM'
52
- elif not word.isalnum():
53
- tag = 'PUNCT'
54
- else:
55
- tag = 'NOUN' # Default to noun
56
-
57
- tagged.append((token, tag))
58
- prev_tag = tag
59
-
60
- return tagged
webstoken/tokenizer.py DELETED
@@ -1,158 +0,0 @@
1
- """
2
- Tokenization utilities for sentence and word-level tokenization.
3
- """
4
-
5
- from typing import List, Dict, Set, Pattern
6
- import re
7
-
8
-
9
- class SentenceTokenizer:
10
- """Advanced sentence tokenizer with support for complex cases and proper formatting."""
11
-
12
- def __init__(self) -> None:
13
- # Common abbreviations by category
14
- self.TITLES: Set[str] = {
15
- 'mr', 'mrs', 'ms', 'dr', 'prof', 'rev', 'sr', 'jr', 'esq',
16
- 'hon', 'pres', 'gov', 'atty', 'supt', 'det', 'rev', 'col','maj', 'gen', 'capt', 'cmdr',
17
- 'lt', 'sgt', 'cpl', 'pvt'
18
- }
19
-
20
- self.ACADEMIC: Set[str] = {
21
- 'ph.d', 'phd', 'm.d', 'md', 'b.a', 'ba', 'm.a', 'ma', 'd.d.s', 'dds',
22
- 'm.b.a', 'mba', 'b.sc', 'bsc', 'm.sc', 'msc', 'llb', 'll.b', 'bl'
23
- }
24
-
25
- self.ORGANIZATIONS: Set[str] = {
26
- 'inc', 'ltd', 'co', 'corp', 'llc', 'llp', 'assn', 'bros', 'plc', 'cos',
27
- 'intl', 'dept', 'est', 'dist', 'mfg', 'div'
28
- }
29
-
30
- self.MONTHS: Set[str] = {
31
- 'jan', 'feb', 'mar', 'apr', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'
32
- }
33
-
34
- self.UNITS: Set[str] = {
35
- 'oz', 'pt', 'qt', 'gal', 'ml', 'cc', 'km', 'cm', 'mm', 'ft', 'in',
36
- 'kg', 'lb', 'lbs', 'hz', 'khz', 'mhz', 'ghz', 'kb', 'mb', 'gb', 'tb'
37
- }
38
-
39
- self.TECHNOLOGY: Set[str] = {
40
- 'v', 'ver', 'app', 'sys', 'dir', 'exe', 'lib', 'api', 'sdk', 'url',
41
- 'cpu', 'gpu', 'ram', 'rom', 'hdd', 'ssd', 'lan', 'wan', 'sql', 'html'
42
- }
43
-
44
- self.MISC: Set[str] = {
45
- 'vs', 'etc', 'ie', 'eg', 'no', 'al', 'ca', 'cf', 'pp', 'est', 'st',
46
- 'approx', 'appt', 'apt', 'dept', 'depts', 'min', 'max', 'avg'
47
- }
48
-
49
- # Combine all abbreviations
50
- self.all_abbreviations: Set[str] = (
51
- self.TITLES | self.ACADEMIC | self.ORGANIZATIONS |
52
- self.MONTHS | self.UNITS | self.TECHNOLOGY | self.MISC
53
- )
54
-
55
- # Special patterns
56
- self.ELLIPSIS: str = r'\.{2,}|…'
57
- self.URL_PATTERN: str = (
58
- r'(?:https?:\/\/|www\.)[\w\-\.]+\.[a-zA-Z]{2,}(?:\/[^\s]*)?'
59
- )
60
- self.EMAIL_PATTERN: str = r'[\w\.-]+@[\w\.-]+\.\w+'
61
- self.NUMBER_PATTERN: str = (
62
- r'\d+(?:\.\d+)?(?:%|°|km|cm|mm|m|kg|g|lb|ft|in|mph|kmh|hz|mhz|ghz)?'
63
- )
64
-
65
- # Quote and bracket pairs
66
- self.QUOTE_PAIRS: Dict[str, str] = {
67
- '"': '"', "'": "'", '"': '"', "「": "」", "『": "』",
68
- "«": "»", "‹": "›", "'": "'", "‚": "'"
69
- }
70
-
71
- self.BRACKETS: Dict[str, str] = {
72
- '(': ')', '[': ']', '{': '}', '⟨': '⟩', '「': '」',
73
- '『': '』', '【': '】', '〖': '〗', '「': '」'
74
- }
75
-
76
- # Compile regex patterns
77
- self._compile_patterns()
78
-
79
- def _compile_patterns(self) -> None:
80
- """Compile regex patterns for better performance."""
81
- # Pattern for finding potential sentence boundaries
82
- self.SENTENCE_END: Pattern = re.compile(
83
- r'''
84
- # Group for sentence endings
85
- (?:
86
- # Standard endings with optional quotes/brackets
87
- (?<=[.!?])[\"\'\)\]\}»›」』\s]*
88
-
89
- # Ellipsis
90
- |(?:\.{2,}|…)
91
-
92
- # Asian-style endings
93
- |(?<=[。!?」』】\s])
94
- )
95
-
96
- # Must be followed by whitespace and capital letter or number
97
- (?=\s+(?:[A-Z0-9]|["'({[\[「『《‹〈][A-Z]))
98
- ''',
99
- re.VERBOSE
100
- )
101
-
102
- # Pattern for abbreviations
103
- abbrev_pattern = '|'.join(re.escape(abbr) for abbr in self.all_abbreviations)
104
- self.ABBREV_PATTERN: Pattern = re.compile(
105
- fr'\b(?:{abbrev_pattern})\.?',
106
- re.IGNORECASE
107
- )
108
-
109
- def tokenize(self, text: str) -> List[str]:
110
- """Split text into sentences while handling complex cases."""
111
- if not text or not text.strip():
112
- return []
113
-
114
- # Initial split on potential sentence boundaries
115
- sentences = self.SENTENCE_END.split(text)
116
-
117
- # Clean and validate sentences
118
- final_sentences = []
119
- for sentence in sentences:
120
- sentence = sentence.strip()
121
- if sentence:
122
- final_sentences.append(sentence)
123
-
124
- return final_sentences
125
-
126
-
127
- class WordTokenizer:
128
- """Simple but effective word tokenizer with support for contractions and special cases."""
129
-
130
- def __init__(self):
131
- self.contractions = {
132
- "n't": "not", "'ll": "will", "'re": "are", "'s": "is",
133
- "'m": "am", "'ve": "have", "'d": "would"
134
- }
135
-
136
- self.word_pattern = re.compile(r"""
137
- (?:[A-Za-z]+(?:[''][A-Za-z]+)*)| # Words with optional internal apostrophes
138
- (?:\d+(?:,\d{3})*(?:\.\d+)?)| # Numbers with commas and decimals
139
- (?:[@#]?\w+)| # Hashtags and mentions
140
- (?:[^\w\s]) # Punctuation and symbols
141
- """, re.VERBOSE)
142
-
143
- def tokenize(self, text: str) -> List[str]:
144
- """Split text into words while handling contractions and special cases."""
145
- tokens = []
146
- for match in self.word_pattern.finditer(text):
147
- word = match.group()
148
- # Handle contractions
149
- for contraction, expansion in self.contractions.items():
150
- if word.endswith(contraction):
151
- base = word[:-len(contraction)]
152
- if base:
153
- tokens.append(base)
154
- tokens.append(expansion)
155
- break
156
- else:
157
- tokens.append(word)
158
- return tokens