webscout 8.2.2__py3-none-any.whl → 2026.1.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (483) hide show
  1. webscout/AIauto.py +524 -143
  2. webscout/AIbase.py +247 -123
  3. webscout/AIutel.py +68 -132
  4. webscout/Bard.py +1072 -535
  5. webscout/Extra/GitToolkit/__init__.py +2 -2
  6. webscout/Extra/GitToolkit/gitapi/__init__.py +20 -12
  7. webscout/Extra/GitToolkit/gitapi/gist.py +142 -0
  8. webscout/Extra/GitToolkit/gitapi/organization.py +91 -0
  9. webscout/Extra/GitToolkit/gitapi/repository.py +308 -195
  10. webscout/Extra/GitToolkit/gitapi/search.py +162 -0
  11. webscout/Extra/GitToolkit/gitapi/trending.py +236 -0
  12. webscout/Extra/GitToolkit/gitapi/user.py +128 -96
  13. webscout/Extra/GitToolkit/gitapi/utils.py +82 -62
  14. webscout/Extra/YTToolkit/README.md +443 -0
  15. webscout/Extra/YTToolkit/YTdownloader.py +953 -957
  16. webscout/Extra/YTToolkit/__init__.py +3 -3
  17. webscout/Extra/YTToolkit/transcriber.py +595 -476
  18. webscout/Extra/YTToolkit/ytapi/README.md +230 -0
  19. webscout/Extra/YTToolkit/ytapi/__init__.py +22 -6
  20. webscout/Extra/YTToolkit/ytapi/captions.py +190 -0
  21. webscout/Extra/YTToolkit/ytapi/channel.py +302 -307
  22. webscout/Extra/YTToolkit/ytapi/errors.py +13 -13
  23. webscout/Extra/YTToolkit/ytapi/extras.py +178 -45
  24. webscout/Extra/YTToolkit/ytapi/hashtag.py +120 -0
  25. webscout/Extra/YTToolkit/ytapi/https.py +89 -88
  26. webscout/Extra/YTToolkit/ytapi/patterns.py +61 -61
  27. webscout/Extra/YTToolkit/ytapi/playlist.py +59 -59
  28. webscout/Extra/YTToolkit/ytapi/pool.py +8 -8
  29. webscout/Extra/YTToolkit/ytapi/query.py +143 -40
  30. webscout/Extra/YTToolkit/ytapi/shorts.py +122 -0
  31. webscout/Extra/YTToolkit/ytapi/stream.py +68 -63
  32. webscout/Extra/YTToolkit/ytapi/suggestions.py +97 -0
  33. webscout/Extra/YTToolkit/ytapi/utils.py +66 -62
  34. webscout/Extra/YTToolkit/ytapi/video.py +189 -18
  35. webscout/Extra/__init__.py +2 -3
  36. webscout/Extra/gguf.py +1298 -682
  37. webscout/Extra/tempmail/README.md +488 -0
  38. webscout/Extra/tempmail/__init__.py +28 -28
  39. webscout/Extra/tempmail/async_utils.py +143 -141
  40. webscout/Extra/tempmail/base.py +172 -161
  41. webscout/Extra/tempmail/cli.py +191 -187
  42. webscout/Extra/tempmail/emailnator.py +88 -84
  43. webscout/Extra/tempmail/mail_tm.py +378 -361
  44. webscout/Extra/tempmail/temp_mail_io.py +304 -292
  45. webscout/Extra/weather.py +196 -194
  46. webscout/Extra/weather_ascii.py +17 -15
  47. webscout/Provider/AISEARCH/PERPLEXED_search.py +175 -0
  48. webscout/Provider/AISEARCH/Perplexity.py +237 -304
  49. webscout/Provider/AISEARCH/README.md +106 -0
  50. webscout/Provider/AISEARCH/__init__.py +16 -10
  51. webscout/Provider/AISEARCH/brave_search.py +298 -0
  52. webscout/Provider/AISEARCH/iask_search.py +130 -209
  53. webscout/Provider/AISEARCH/monica_search.py +200 -246
  54. webscout/Provider/AISEARCH/webpilotai_search.py +242 -281
  55. webscout/Provider/Algion.py +413 -0
  56. webscout/Provider/Andi.py +74 -69
  57. webscout/Provider/Apriel.py +313 -0
  58. webscout/Provider/Ayle.py +323 -0
  59. webscout/Provider/ChatSandbox.py +329 -0
  60. webscout/Provider/ClaudeOnline.py +365 -0
  61. webscout/Provider/Cohere.py +232 -208
  62. webscout/Provider/DeepAI.py +367 -0
  63. webscout/Provider/Deepinfra.py +343 -173
  64. webscout/Provider/EssentialAI.py +217 -0
  65. webscout/Provider/ExaAI.py +274 -261
  66. webscout/Provider/Gemini.py +60 -54
  67. webscout/Provider/GithubChat.py +385 -367
  68. webscout/Provider/Gradient.py +286 -0
  69. webscout/Provider/Groq.py +556 -670
  70. webscout/Provider/HadadXYZ.py +323 -0
  71. webscout/Provider/HeckAI.py +392 -233
  72. webscout/Provider/HuggingFace.py +387 -0
  73. webscout/Provider/IBM.py +340 -0
  74. webscout/Provider/Jadve.py +317 -266
  75. webscout/Provider/K2Think.py +306 -0
  76. webscout/Provider/Koboldai.py +221 -381
  77. webscout/Provider/Netwrck.py +273 -228
  78. webscout/Provider/Nvidia.py +310 -0
  79. webscout/Provider/OPENAI/DeepAI.py +489 -0
  80. webscout/Provider/OPENAI/K2Think.py +423 -0
  81. webscout/Provider/OPENAI/PI.py +463 -0
  82. webscout/Provider/OPENAI/README.md +890 -0
  83. webscout/Provider/OPENAI/TogetherAI.py +405 -0
  84. webscout/Provider/OPENAI/TwoAI.py +255 -0
  85. webscout/Provider/OPENAI/__init__.py +148 -25
  86. webscout/Provider/OPENAI/ai4chat.py +348 -0
  87. webscout/Provider/OPENAI/akashgpt.py +436 -0
  88. webscout/Provider/OPENAI/algion.py +303 -0
  89. webscout/Provider/OPENAI/ayle.py +365 -0
  90. webscout/Provider/OPENAI/base.py +253 -46
  91. webscout/Provider/OPENAI/cerebras.py +296 -0
  92. webscout/Provider/OPENAI/chatgpt.py +514 -193
  93. webscout/Provider/OPENAI/chatsandbox.py +233 -0
  94. webscout/Provider/OPENAI/deepinfra.py +403 -272
  95. webscout/Provider/OPENAI/e2b.py +2370 -1350
  96. webscout/Provider/OPENAI/elmo.py +278 -0
  97. webscout/Provider/OPENAI/exaai.py +186 -138
  98. webscout/Provider/OPENAI/freeassist.py +446 -0
  99. webscout/Provider/OPENAI/gradient.py +448 -0
  100. webscout/Provider/OPENAI/groq.py +380 -0
  101. webscout/Provider/OPENAI/hadadxyz.py +292 -0
  102. webscout/Provider/OPENAI/heckai.py +100 -104
  103. webscout/Provider/OPENAI/huggingface.py +321 -0
  104. webscout/Provider/OPENAI/ibm.py +425 -0
  105. webscout/Provider/OPENAI/llmchat.py +253 -0
  106. webscout/Provider/OPENAI/llmchatco.py +378 -327
  107. webscout/Provider/OPENAI/meta.py +541 -0
  108. webscout/Provider/OPENAI/netwrck.py +110 -84
  109. webscout/Provider/OPENAI/nvidia.py +317 -0
  110. webscout/Provider/OPENAI/oivscode.py +348 -0
  111. webscout/Provider/OPENAI/openrouter.py +328 -0
  112. webscout/Provider/OPENAI/pydantic_imports.py +1 -0
  113. webscout/Provider/OPENAI/sambanova.py +397 -0
  114. webscout/Provider/OPENAI/sonus.py +126 -115
  115. webscout/Provider/OPENAI/textpollinations.py +218 -133
  116. webscout/Provider/OPENAI/toolbaz.py +136 -166
  117. webscout/Provider/OPENAI/typefully.py +419 -0
  118. webscout/Provider/OPENAI/typliai.py +279 -0
  119. webscout/Provider/OPENAI/utils.py +314 -211
  120. webscout/Provider/OPENAI/wisecat.py +103 -125
  121. webscout/Provider/OPENAI/writecream.py +185 -156
  122. webscout/Provider/OPENAI/x0gpt.py +227 -136
  123. webscout/Provider/OPENAI/zenmux.py +380 -0
  124. webscout/Provider/OpenRouter.py +386 -0
  125. webscout/Provider/Openai.py +337 -496
  126. webscout/Provider/PI.py +443 -344
  127. webscout/Provider/QwenLM.py +346 -254
  128. webscout/Provider/STT/__init__.py +28 -0
  129. webscout/Provider/STT/base.py +303 -0
  130. webscout/Provider/STT/elevenlabs.py +264 -0
  131. webscout/Provider/Sambanova.py +317 -0
  132. webscout/Provider/TTI/README.md +69 -0
  133. webscout/Provider/TTI/__init__.py +37 -12
  134. webscout/Provider/TTI/base.py +147 -0
  135. webscout/Provider/TTI/claudeonline.py +393 -0
  136. webscout/Provider/TTI/magicstudio.py +292 -0
  137. webscout/Provider/TTI/miragic.py +180 -0
  138. webscout/Provider/TTI/pollinations.py +331 -0
  139. webscout/Provider/TTI/together.py +334 -0
  140. webscout/Provider/TTI/utils.py +14 -0
  141. webscout/Provider/TTS/README.md +186 -0
  142. webscout/Provider/TTS/__init__.py +43 -7
  143. webscout/Provider/TTS/base.py +523 -0
  144. webscout/Provider/TTS/deepgram.py +286 -156
  145. webscout/Provider/TTS/elevenlabs.py +189 -111
  146. webscout/Provider/TTS/freetts.py +218 -0
  147. webscout/Provider/TTS/murfai.py +288 -113
  148. webscout/Provider/TTS/openai_fm.py +364 -0
  149. webscout/Provider/TTS/parler.py +203 -111
  150. webscout/Provider/TTS/qwen.py +334 -0
  151. webscout/Provider/TTS/sherpa.py +286 -0
  152. webscout/Provider/TTS/speechma.py +693 -180
  153. webscout/Provider/TTS/streamElements.py +275 -333
  154. webscout/Provider/TTS/utils.py +280 -280
  155. webscout/Provider/TextPollinationsAI.py +221 -121
  156. webscout/Provider/TogetherAI.py +450 -0
  157. webscout/Provider/TwoAI.py +309 -199
  158. webscout/Provider/TypliAI.py +311 -0
  159. webscout/Provider/UNFINISHED/ChatHub.py +219 -0
  160. webscout/Provider/{OPENAI/glider.py → UNFINISHED/ChutesAI.py} +160 -145
  161. webscout/Provider/UNFINISHED/GizAI.py +300 -0
  162. webscout/Provider/UNFINISHED/Marcus.py +218 -0
  163. webscout/Provider/UNFINISHED/Qodo.py +481 -0
  164. webscout/Provider/UNFINISHED/XenAI.py +330 -0
  165. webscout/Provider/{Youchat.py → UNFINISHED/Youchat.py} +64 -47
  166. webscout/Provider/UNFINISHED/aihumanizer.py +41 -0
  167. webscout/Provider/UNFINISHED/grammerchecker.py +37 -0
  168. webscout/Provider/UNFINISHED/liner.py +342 -0
  169. webscout/Provider/UNFINISHED/liner_api_request.py +246 -0
  170. webscout/Provider/UNFINISHED/samurai.py +231 -0
  171. webscout/Provider/WiseCat.py +256 -196
  172. webscout/Provider/WrDoChat.py +390 -0
  173. webscout/Provider/__init__.py +115 -198
  174. webscout/Provider/ai4chat.py +181 -202
  175. webscout/Provider/akashgpt.py +330 -342
  176. webscout/Provider/cerebras.py +397 -242
  177. webscout/Provider/cleeai.py +236 -213
  178. webscout/Provider/elmo.py +291 -234
  179. webscout/Provider/geminiapi.py +343 -208
  180. webscout/Provider/julius.py +245 -223
  181. webscout/Provider/learnfastai.py +333 -266
  182. webscout/Provider/llama3mitril.py +230 -180
  183. webscout/Provider/llmchat.py +308 -213
  184. webscout/Provider/llmchatco.py +321 -311
  185. webscout/Provider/meta.py +996 -794
  186. webscout/Provider/oivscode.py +332 -0
  187. webscout/Provider/searchchat.py +316 -293
  188. webscout/Provider/sonus.py +264 -208
  189. webscout/Provider/toolbaz.py +359 -320
  190. webscout/Provider/turboseek.py +332 -219
  191. webscout/Provider/typefully.py +262 -280
  192. webscout/Provider/x0gpt.py +332 -256
  193. webscout/__init__.py +31 -38
  194. webscout/__main__.py +5 -5
  195. webscout/cli.py +585 -293
  196. webscout/client.py +1497 -0
  197. webscout/conversation.py +140 -565
  198. webscout/exceptions.py +383 -339
  199. webscout/litagent/__init__.py +29 -29
  200. webscout/litagent/agent.py +492 -455
  201. webscout/litagent/constants.py +60 -60
  202. webscout/models.py +505 -181
  203. webscout/optimizers.py +32 -378
  204. webscout/prompt_manager.py +376 -274
  205. webscout/sanitize.py +1514 -0
  206. webscout/scout/README.md +452 -0
  207. webscout/scout/__init__.py +8 -8
  208. webscout/scout/core/__init__.py +7 -7
  209. webscout/scout/core/crawler.py +330 -140
  210. webscout/scout/core/scout.py +800 -568
  211. webscout/scout/core/search_result.py +51 -96
  212. webscout/scout/core/text_analyzer.py +64 -63
  213. webscout/scout/core/text_utils.py +412 -277
  214. webscout/scout/core/web_analyzer.py +54 -52
  215. webscout/scout/element.py +872 -460
  216. webscout/scout/parsers/__init__.py +70 -69
  217. webscout/scout/parsers/html5lib_parser.py +182 -172
  218. webscout/scout/parsers/html_parser.py +238 -236
  219. webscout/scout/parsers/lxml_parser.py +203 -178
  220. webscout/scout/utils.py +38 -37
  221. webscout/search/__init__.py +47 -0
  222. webscout/search/base.py +201 -0
  223. webscout/search/bing_main.py +45 -0
  224. webscout/search/brave_main.py +92 -0
  225. webscout/search/duckduckgo_main.py +57 -0
  226. webscout/search/engines/__init__.py +127 -0
  227. webscout/search/engines/bing/__init__.py +15 -0
  228. webscout/search/engines/bing/base.py +35 -0
  229. webscout/search/engines/bing/images.py +114 -0
  230. webscout/search/engines/bing/news.py +96 -0
  231. webscout/search/engines/bing/suggestions.py +36 -0
  232. webscout/search/engines/bing/text.py +109 -0
  233. webscout/search/engines/brave/__init__.py +19 -0
  234. webscout/search/engines/brave/base.py +47 -0
  235. webscout/search/engines/brave/images.py +213 -0
  236. webscout/search/engines/brave/news.py +353 -0
  237. webscout/search/engines/brave/suggestions.py +318 -0
  238. webscout/search/engines/brave/text.py +167 -0
  239. webscout/search/engines/brave/videos.py +364 -0
  240. webscout/search/engines/duckduckgo/__init__.py +25 -0
  241. webscout/search/engines/duckduckgo/answers.py +80 -0
  242. webscout/search/engines/duckduckgo/base.py +189 -0
  243. webscout/search/engines/duckduckgo/images.py +100 -0
  244. webscout/search/engines/duckduckgo/maps.py +183 -0
  245. webscout/search/engines/duckduckgo/news.py +70 -0
  246. webscout/search/engines/duckduckgo/suggestions.py +22 -0
  247. webscout/search/engines/duckduckgo/text.py +221 -0
  248. webscout/search/engines/duckduckgo/translate.py +48 -0
  249. webscout/search/engines/duckduckgo/videos.py +80 -0
  250. webscout/search/engines/duckduckgo/weather.py +84 -0
  251. webscout/search/engines/mojeek.py +61 -0
  252. webscout/search/engines/wikipedia.py +77 -0
  253. webscout/search/engines/yahoo/__init__.py +41 -0
  254. webscout/search/engines/yahoo/answers.py +19 -0
  255. webscout/search/engines/yahoo/base.py +34 -0
  256. webscout/search/engines/yahoo/images.py +323 -0
  257. webscout/search/engines/yahoo/maps.py +19 -0
  258. webscout/search/engines/yahoo/news.py +258 -0
  259. webscout/search/engines/yahoo/suggestions.py +140 -0
  260. webscout/search/engines/yahoo/text.py +273 -0
  261. webscout/search/engines/yahoo/translate.py +19 -0
  262. webscout/search/engines/yahoo/videos.py +302 -0
  263. webscout/search/engines/yahoo/weather.py +220 -0
  264. webscout/search/engines/yandex.py +67 -0
  265. webscout/search/engines/yep/__init__.py +13 -0
  266. webscout/search/engines/yep/base.py +34 -0
  267. webscout/search/engines/yep/images.py +101 -0
  268. webscout/search/engines/yep/suggestions.py +38 -0
  269. webscout/search/engines/yep/text.py +99 -0
  270. webscout/search/http_client.py +172 -0
  271. webscout/search/results.py +141 -0
  272. webscout/search/yahoo_main.py +57 -0
  273. webscout/search/yep_main.py +48 -0
  274. webscout/server/__init__.py +48 -0
  275. webscout/server/config.py +78 -0
  276. webscout/server/exceptions.py +69 -0
  277. webscout/server/providers.py +286 -0
  278. webscout/server/request_models.py +131 -0
  279. webscout/server/request_processing.py +404 -0
  280. webscout/server/routes.py +642 -0
  281. webscout/server/server.py +351 -0
  282. webscout/server/ui_templates.py +1171 -0
  283. webscout/swiftcli/__init__.py +79 -809
  284. webscout/swiftcli/core/__init__.py +7 -0
  285. webscout/swiftcli/core/cli.py +574 -0
  286. webscout/swiftcli/core/context.py +98 -0
  287. webscout/swiftcli/core/group.py +268 -0
  288. webscout/swiftcli/decorators/__init__.py +28 -0
  289. webscout/swiftcli/decorators/command.py +243 -0
  290. webscout/swiftcli/decorators/options.py +247 -0
  291. webscout/swiftcli/decorators/output.py +392 -0
  292. webscout/swiftcli/exceptions.py +21 -0
  293. webscout/swiftcli/plugins/__init__.py +9 -0
  294. webscout/swiftcli/plugins/base.py +134 -0
  295. webscout/swiftcli/plugins/manager.py +269 -0
  296. webscout/swiftcli/utils/__init__.py +58 -0
  297. webscout/swiftcli/utils/formatting.py +251 -0
  298. webscout/swiftcli/utils/parsing.py +368 -0
  299. webscout/update_checker.py +280 -136
  300. webscout/utils.py +28 -14
  301. webscout/version.py +2 -1
  302. webscout/version.py.bak +3 -0
  303. webscout/zeroart/__init__.py +218 -55
  304. webscout/zeroart/base.py +70 -60
  305. webscout/zeroart/effects.py +155 -99
  306. webscout/zeroart/fonts.py +1799 -816
  307. webscout-2026.1.19.dist-info/METADATA +638 -0
  308. webscout-2026.1.19.dist-info/RECORD +312 -0
  309. {webscout-8.2.2.dist-info → webscout-2026.1.19.dist-info}/WHEEL +1 -1
  310. webscout-2026.1.19.dist-info/entry_points.txt +4 -0
  311. webscout-2026.1.19.dist-info/top_level.txt +1 -0
  312. inferno/__init__.py +0 -6
  313. inferno/__main__.py +0 -9
  314. inferno/cli.py +0 -6
  315. webscout/DWEBS.py +0 -477
  316. webscout/Extra/autocoder/__init__.py +0 -9
  317. webscout/Extra/autocoder/autocoder.py +0 -849
  318. webscout/Extra/autocoder/autocoder_utiles.py +0 -332
  319. webscout/LLM.py +0 -442
  320. webscout/Litlogger/__init__.py +0 -67
  321. webscout/Litlogger/core/__init__.py +0 -6
  322. webscout/Litlogger/core/level.py +0 -23
  323. webscout/Litlogger/core/logger.py +0 -165
  324. webscout/Litlogger/handlers/__init__.py +0 -12
  325. webscout/Litlogger/handlers/console.py +0 -33
  326. webscout/Litlogger/handlers/file.py +0 -143
  327. webscout/Litlogger/handlers/network.py +0 -173
  328. webscout/Litlogger/styles/__init__.py +0 -7
  329. webscout/Litlogger/styles/colors.py +0 -249
  330. webscout/Litlogger/styles/formats.py +0 -458
  331. webscout/Litlogger/styles/text.py +0 -87
  332. webscout/Litlogger/utils/__init__.py +0 -6
  333. webscout/Litlogger/utils/detectors.py +0 -153
  334. webscout/Litlogger/utils/formatters.py +0 -200
  335. webscout/Local/__init__.py +0 -12
  336. webscout/Local/__main__.py +0 -9
  337. webscout/Local/api.py +0 -576
  338. webscout/Local/cli.py +0 -516
  339. webscout/Local/config.py +0 -75
  340. webscout/Local/llm.py +0 -287
  341. webscout/Local/model_manager.py +0 -253
  342. webscout/Local/server.py +0 -721
  343. webscout/Local/utils.py +0 -93
  344. webscout/Provider/AI21.py +0 -177
  345. webscout/Provider/AISEARCH/DeepFind.py +0 -250
  346. webscout/Provider/AISEARCH/ISou.py +0 -256
  347. webscout/Provider/AISEARCH/felo_search.py +0 -228
  348. webscout/Provider/AISEARCH/genspark_search.py +0 -208
  349. webscout/Provider/AISEARCH/hika_search.py +0 -194
  350. webscout/Provider/AISEARCH/scira_search.py +0 -324
  351. webscout/Provider/Aitopia.py +0 -292
  352. webscout/Provider/AllenAI.py +0 -413
  353. webscout/Provider/Blackboxai.py +0 -229
  354. webscout/Provider/C4ai.py +0 -432
  355. webscout/Provider/ChatGPTClone.py +0 -226
  356. webscout/Provider/ChatGPTES.py +0 -237
  357. webscout/Provider/ChatGPTGratis.py +0 -194
  358. webscout/Provider/Chatify.py +0 -175
  359. webscout/Provider/Cloudflare.py +0 -273
  360. webscout/Provider/DeepSeek.py +0 -196
  361. webscout/Provider/ElectronHub.py +0 -709
  362. webscout/Provider/ExaChat.py +0 -342
  363. webscout/Provider/Free2GPT.py +0 -241
  364. webscout/Provider/GPTWeb.py +0 -193
  365. webscout/Provider/Glider.py +0 -211
  366. webscout/Provider/HF_space/__init__.py +0 -0
  367. webscout/Provider/HF_space/qwen_qwen2.py +0 -206
  368. webscout/Provider/HuggingFaceChat.py +0 -462
  369. webscout/Provider/Hunyuan.py +0 -272
  370. webscout/Provider/LambdaChat.py +0 -392
  371. webscout/Provider/Llama.py +0 -200
  372. webscout/Provider/Llama3.py +0 -204
  373. webscout/Provider/Marcus.py +0 -148
  374. webscout/Provider/OLLAMA.py +0 -396
  375. webscout/Provider/OPENAI/c4ai.py +0 -367
  376. webscout/Provider/OPENAI/chatgptclone.py +0 -460
  377. webscout/Provider/OPENAI/exachat.py +0 -433
  378. webscout/Provider/OPENAI/freeaichat.py +0 -352
  379. webscout/Provider/OPENAI/opkfc.py +0 -488
  380. webscout/Provider/OPENAI/scirachat.py +0 -463
  381. webscout/Provider/OPENAI/standardinput.py +0 -425
  382. webscout/Provider/OPENAI/typegpt.py +0 -346
  383. webscout/Provider/OPENAI/uncovrAI.py +0 -455
  384. webscout/Provider/OPENAI/venice.py +0 -413
  385. webscout/Provider/OPENAI/yep.py +0 -327
  386. webscout/Provider/OpenGPT.py +0 -199
  387. webscout/Provider/Perplexitylabs.py +0 -415
  388. webscout/Provider/Phind.py +0 -535
  389. webscout/Provider/PizzaGPT.py +0 -198
  390. webscout/Provider/Reka.py +0 -214
  391. webscout/Provider/StandardInput.py +0 -278
  392. webscout/Provider/TTI/AiForce/__init__.py +0 -22
  393. webscout/Provider/TTI/AiForce/async_aiforce.py +0 -224
  394. webscout/Provider/TTI/AiForce/sync_aiforce.py +0 -245
  395. webscout/Provider/TTI/FreeAIPlayground/__init__.py +0 -9
  396. webscout/Provider/TTI/FreeAIPlayground/async_freeaiplayground.py +0 -181
  397. webscout/Provider/TTI/FreeAIPlayground/sync_freeaiplayground.py +0 -180
  398. webscout/Provider/TTI/ImgSys/__init__.py +0 -23
  399. webscout/Provider/TTI/ImgSys/async_imgsys.py +0 -202
  400. webscout/Provider/TTI/ImgSys/sync_imgsys.py +0 -195
  401. webscout/Provider/TTI/MagicStudio/__init__.py +0 -2
  402. webscout/Provider/TTI/MagicStudio/async_magicstudio.py +0 -111
  403. webscout/Provider/TTI/MagicStudio/sync_magicstudio.py +0 -109
  404. webscout/Provider/TTI/Nexra/__init__.py +0 -22
  405. webscout/Provider/TTI/Nexra/async_nexra.py +0 -286
  406. webscout/Provider/TTI/Nexra/sync_nexra.py +0 -258
  407. webscout/Provider/TTI/PollinationsAI/__init__.py +0 -23
  408. webscout/Provider/TTI/PollinationsAI/async_pollinations.py +0 -311
  409. webscout/Provider/TTI/PollinationsAI/sync_pollinations.py +0 -265
  410. webscout/Provider/TTI/aiarta/__init__.py +0 -2
  411. webscout/Provider/TTI/aiarta/async_aiarta.py +0 -482
  412. webscout/Provider/TTI/aiarta/sync_aiarta.py +0 -440
  413. webscout/Provider/TTI/artbit/__init__.py +0 -22
  414. webscout/Provider/TTI/artbit/async_artbit.py +0 -155
  415. webscout/Provider/TTI/artbit/sync_artbit.py +0 -148
  416. webscout/Provider/TTI/fastflux/__init__.py +0 -22
  417. webscout/Provider/TTI/fastflux/async_fastflux.py +0 -261
  418. webscout/Provider/TTI/fastflux/sync_fastflux.py +0 -252
  419. webscout/Provider/TTI/huggingface/__init__.py +0 -22
  420. webscout/Provider/TTI/huggingface/async_huggingface.py +0 -199
  421. webscout/Provider/TTI/huggingface/sync_huggingface.py +0 -195
  422. webscout/Provider/TTI/piclumen/__init__.py +0 -23
  423. webscout/Provider/TTI/piclumen/async_piclumen.py +0 -268
  424. webscout/Provider/TTI/piclumen/sync_piclumen.py +0 -233
  425. webscout/Provider/TTI/pixelmuse/__init__.py +0 -4
  426. webscout/Provider/TTI/pixelmuse/async_pixelmuse.py +0 -249
  427. webscout/Provider/TTI/pixelmuse/sync_pixelmuse.py +0 -182
  428. webscout/Provider/TTI/talkai/__init__.py +0 -4
  429. webscout/Provider/TTI/talkai/async_talkai.py +0 -229
  430. webscout/Provider/TTI/talkai/sync_talkai.py +0 -207
  431. webscout/Provider/TTS/gesserit.py +0 -127
  432. webscout/Provider/TeachAnything.py +0 -187
  433. webscout/Provider/Venice.py +0 -219
  434. webscout/Provider/VercelAI.py +0 -234
  435. webscout/Provider/WebSim.py +0 -228
  436. webscout/Provider/Writecream.py +0 -211
  437. webscout/Provider/WritingMate.py +0 -197
  438. webscout/Provider/aimathgpt.py +0 -189
  439. webscout/Provider/askmyai.py +0 -158
  440. webscout/Provider/asksteve.py +0 -203
  441. webscout/Provider/bagoodex.py +0 -145
  442. webscout/Provider/chatglm.py +0 -205
  443. webscout/Provider/copilot.py +0 -428
  444. webscout/Provider/freeaichat.py +0 -271
  445. webscout/Provider/gaurish.py +0 -244
  446. webscout/Provider/geminiprorealtime.py +0 -160
  447. webscout/Provider/granite.py +0 -187
  448. webscout/Provider/hermes.py +0 -219
  449. webscout/Provider/koala.py +0 -268
  450. webscout/Provider/labyrinth.py +0 -340
  451. webscout/Provider/lepton.py +0 -194
  452. webscout/Provider/llamatutor.py +0 -192
  453. webscout/Provider/multichat.py +0 -325
  454. webscout/Provider/promptrefine.py +0 -193
  455. webscout/Provider/scira_chat.py +0 -277
  456. webscout/Provider/scnet.py +0 -187
  457. webscout/Provider/talkai.py +0 -194
  458. webscout/Provider/tutorai.py +0 -252
  459. webscout/Provider/typegpt.py +0 -232
  460. webscout/Provider/uncovr.py +0 -312
  461. webscout/Provider/yep.py +0 -376
  462. webscout/litprinter/__init__.py +0 -59
  463. webscout/scout/core.py +0 -881
  464. webscout/tempid.py +0 -128
  465. webscout/webscout_search.py +0 -1346
  466. webscout/webscout_search_async.py +0 -877
  467. webscout/yep_search.py +0 -297
  468. webscout-8.2.2.dist-info/METADATA +0 -734
  469. webscout-8.2.2.dist-info/RECORD +0 -309
  470. webscout-8.2.2.dist-info/entry_points.txt +0 -5
  471. webscout-8.2.2.dist-info/top_level.txt +0 -3
  472. webstoken/__init__.py +0 -30
  473. webstoken/classifier.py +0 -189
  474. webstoken/keywords.py +0 -216
  475. webstoken/language.py +0 -128
  476. webstoken/ner.py +0 -164
  477. webstoken/normalizer.py +0 -35
  478. webstoken/processor.py +0 -77
  479. webstoken/sentiment.py +0 -206
  480. webstoken/stemmer.py +0 -73
  481. webstoken/tagger.py +0 -60
  482. webstoken/tokenizer.py +0 -158
  483. {webscout-8.2.2.dist-info → webscout-2026.1.19.dist-info/licenses}/LICENSE.md +0 -0
webstoken/keywords.py DELETED
@@ -1,216 +0,0 @@
1
- """
2
- Keyword extraction module using statistical and graph-based approaches.
3
- """
4
-
5
- from typing import Dict, List, Set, Tuple
6
- from collections import Counter, defaultdict
7
- import math
8
- import re
9
-
10
- from .tokenizer import WordTokenizer
11
- from .normalizer import TextNormalizer
12
-
13
-
14
- class KeywordExtractor:
15
- """Keyword extraction using TF-IDF and TextRank-inspired algorithms."""
16
-
17
- def __init__(self):
18
- self.word_tokenizer = WordTokenizer()
19
- self.normalizer = TextNormalizer()
20
-
21
- # Common words to filter out beyond basic stop words
22
- self.filter_words: Set[str] = {
23
- 'would', 'could', 'should', 'said', 'also', 'may', 'might',
24
- 'must', 'need', 'shall', 'want', 'way', 'time', 'just',
25
- 'now', 'like', 'make', 'made', 'well', 'back', 'even',
26
- 'still', 'way', 'take', 'took', 'get', 'got', 'go', 'went'
27
- }
28
-
29
- def _split_into_sentences(self, text: str) -> List[str]:
30
- """Split text into sentences using simple rules."""
31
- text = re.sub(r'\s+', ' ', text)
32
- sentences = re.split(r'[.!?]+', text)
33
- return [s.strip() for s in sentences if s.strip()]
34
-
35
- def _calculate_word_scores(self, text: str) -> Dict[str, float]:
36
- """Calculate word importance scores using frequency and position."""
37
- # Normalize and tokenize text
38
- text = self.normalizer.normalize(text)
39
- sentences = self._split_into_sentences(text)
40
-
41
- word_scores: Dict[str, float] = defaultdict(float)
42
- word_positions: Dict[str, List[int]] = defaultdict(list)
43
-
44
- # Calculate word frequencies and positions
45
- for i, sentence in enumerate(sentences):
46
- words = self.word_tokenizer.tokenize(sentence)
47
- for j, word in enumerate(words):
48
- word = word.lower()
49
- if (word.isalnum() and
50
- len(word) > 2 and
51
- word not in self.filter_words and
52
- word not in self.normalizer.stop_words):
53
- word_scores[word] += 1
54
- word_positions[word].append(i)
55
-
56
- # Adjust scores based on position
57
- num_sentences = len(sentences)
58
- for word, positions in word_positions.items():
59
- # Words appearing in first or last sentences get bonus
60
- if 0 in positions:
61
- word_scores[word] *= 1.2
62
- if num_sentences - 1 in positions:
63
- word_scores[word] *= 1.1
64
-
65
- # Words appearing throughout text get bonus
66
- coverage = len(set(positions)) / num_sentences
67
- word_scores[word] *= (1 + coverage)
68
-
69
- return word_scores
70
-
71
- def _calculate_word_cooccurrence(self, text: str, window_size: int = 3) -> Dict[str, Dict[str, int]]:
72
- """Calculate word co-occurrence matrix."""
73
- # Normalize and tokenize text
74
- text = self.normalizer.normalize(text)
75
- words = self.word_tokenizer.tokenize(text)
76
-
77
- # Filter words
78
- filtered_words = [
79
- word.lower() for word in words
80
- if (word.isalnum() and
81
- len(word) > 2 and
82
- word.lower() not in self.filter_words and
83
- word.lower() not in self.normalizer.stop_words)
84
- ]
85
-
86
- # Build co-occurrence matrix
87
- cooccurrence: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int))
88
-
89
- for i, word in enumerate(filtered_words):
90
- for j in range(max(0, i - window_size), min(len(filtered_words), i + window_size + 1)):
91
- if i != j:
92
- cooccurrence[word][filtered_words[j]] += 1
93
- cooccurrence[filtered_words[j]][word] += 1
94
-
95
- return cooccurrence
96
-
97
- def _textrank_scores(self, cooccurrence: Dict[str, Dict[str, int]], damping: float = 0.85,
98
- iterations: int = 30) -> Dict[str, float]:
99
- """Calculate TextRank scores from co-occurrence matrix."""
100
- scores = {word: 1.0 for word in cooccurrence}
101
-
102
- for _ in range(iterations):
103
- new_scores = {}
104
- for word in scores:
105
- if not cooccurrence[word]:
106
- continue
107
-
108
- incoming_score = sum(
109
- scores[other] * cooccurrence[word][other] / sum(cooccurrence[other].values())
110
- for other in cooccurrence[word]
111
- )
112
- new_scores[word] = (1 - damping) + damping * incoming_score
113
-
114
- # Check convergence
115
- score_diff = sum(abs(new_scores[w] - scores[w]) for w in scores)
116
- scores = new_scores
117
- if score_diff < 0.0001:
118
- break
119
-
120
- return scores
121
-
122
- def extract_keywords(self, text: str, num_keywords: int = 10,
123
- use_textrank: bool = True) -> List[Tuple[str, float]]:
124
- """
125
- Extract keywords from text using combined frequency and graph-based approach.
126
-
127
- Args:
128
- text: Input text
129
- num_keywords: Number of keywords to return
130
- use_textrank: Whether to use TextRank algorithm
131
-
132
- Returns:
133
- List of (keyword, score) tuples, sorted by score
134
- """
135
- if not text:
136
- return []
137
-
138
- # Get frequency-based scores
139
- freq_scores = self._calculate_word_scores(text)
140
-
141
- if use_textrank:
142
- # Get TextRank scores
143
- cooccurrence = self._calculate_word_cooccurrence(text)
144
- textrank_scores = self._textrank_scores(cooccurrence)
145
-
146
- # Combine scores
147
- combined_scores = {
148
- word: freq_scores[word] * textrank_scores.get(word, 0)
149
- for word in freq_scores
150
- }
151
- else:
152
- combined_scores = freq_scores
153
-
154
- # Sort and return top keywords
155
- sorted_words = sorted(
156
- combined_scores.items(),
157
- key=lambda x: x[1],
158
- reverse=True
159
- )
160
-
161
- return sorted_words[:num_keywords]
162
-
163
- def extract_keyphrases(self, text: str, num_phrases: int = 5,
164
- min_words: int = 2, max_words: int = 4) -> List[Tuple[str, float]]:
165
- """
166
- Extract key phrases from text.
167
-
168
- Args:
169
- text: Input text
170
- num_phrases: Number of phrases to return
171
- min_words: Minimum words in phrase
172
- max_words: Maximum words in phrase
173
-
174
- Returns:
175
- List of (phrase, score) tuples, sorted by score
176
- """
177
- # Normalize and split into sentences
178
- text = self.normalizer.normalize(text)
179
- sentences = self._split_into_sentences(text)
180
-
181
- # Get word importance scores
182
- word_scores = self._calculate_word_scores(text)
183
-
184
- # Extract candidate phrases
185
- phrases: Dict[str, float] = {}
186
-
187
- for sentence in sentences:
188
- words = self.word_tokenizer.tokenize(sentence)
189
-
190
- # Generate phrases of different lengths
191
- for i in range(len(words)):
192
- for length in range(min_words, min(max_words + 1, len(words) - i + 1)):
193
- phrase_words = words[i:i+length]
194
-
195
- # Filter phrases
196
- if all(
197
- word.isalnum() and
198
- len(word) > 2 and
199
- word.lower() not in self.filter_words and
200
- word.lower() not in self.normalizer.stop_words
201
- for word in phrase_words
202
- ):
203
- phrase = ' '.join(phrase_words)
204
- # Score is average of word scores
205
- score = sum(word_scores.get(word.lower(), 0) for word in phrase_words)
206
- score /= len(phrase_words)
207
- phrases[phrase] = score
208
-
209
- # Sort and return top phrases
210
- sorted_phrases = sorted(
211
- phrases.items(),
212
- key=lambda x: x[1],
213
- reverse=True
214
- )
215
-
216
- return sorted_phrases[:num_phrases]
webstoken/language.py DELETED
@@ -1,128 +0,0 @@
1
- """
2
- Language detection module using character and word frequency analysis.
3
- """
4
-
5
- from typing import Dict, List, Set, Tuple
6
- from collections import Counter
7
- import re
8
-
9
-
10
- class LanguageDetector:
11
- """Language detection using character n-gram frequencies."""
12
-
13
- def __init__(self):
14
- # Language profiles based on common character sequences
15
- self.language_profiles = {
16
- 'ENGLISH': {
17
- 'chars': 'etaoinshrdlcumwfgypbvkjxqz',
18
- 'ngrams': {'th', 'he', 'in', 'er', 'an', 're', 'on', 'at', 'en', 'nd',
19
- 'ti', 'es', 'or', 'te', 'of', 'ed', 'is', 'it', 'al', 'ar',
20
- 'st', 'to', 'nt', 'ng', 'se', 'ha', 'as', 'ou', 'io', 'le'},
21
- 'words': {'the', 'be', 'to', 'of', 'and', 'a', 'in', 'that', 'have',
22
- 'i', 'it', 'for', 'not', 'on', 'with', 'he', 'as', 'you',
23
- 'do', 'at'}
24
- },
25
- 'SPANISH': {
26
- 'chars': 'eaosrnidlctumpbgvyqhfzjñxwk',
27
- 'ngrams': {'de', 'en', 'el', 'la', 'os', 'es', 'as', 'ar', 'er', 'ra',
28
- 'al', 'an', 'do', 'or', 'ta', 'ue', 'io', 'on', 'ro', 'ad',
29
- 'te', 'co', 'st', 'ci', 'nt', 'to', 'lo', 'no', 'po', 'ac'},
30
- 'words': {'de', 'la', 'que', 'el', 'en', 'y', 'a', 'los', 'se', 'del',
31
- 'las', 'un', 'por', 'con', 'no', 'una', 'su', 'para', 'es',
32
- 'al'}
33
- },
34
- 'FRENCH': {
35
- 'chars': 'esaitnrulodcpmévqfbghàjxèêyçwzùâîôûëïüœ',
36
- 'ngrams': {'es', 'le', 'en', 'de', 'nt', 'on', 're', 'er', 'ai', 'te',
37
- 'la', 'an', 'ou', 'it', 'ur', 'et', 'el', 'se', 'qu', 'me',
38
- 'is', 'ar', 'ce', 'ns', 'us', 'ue', 'ss', 'ie', 'em', 'tr'},
39
- 'words': {'le', 'de', 'un', 'être', 'et', 'à', 'il', 'avoir', 'ne',
40
- 'je', 'son', 'que', 'se', 'qui', 'ce', 'dans', 'en', 'du',
41
- 'elle', 'au'}
42
- },
43
- 'GERMAN': {
44
- 'chars': 'enisratdhulcgmobwfkzvüpäößjyqxéèêëàáâãåāăąćčĉċďđ',
45
- 'ngrams': {'en', 'er', 'ch', 'de', 'ei', 'in', 'te', 'nd', 'ie', 'ge',
46
- 'st', 'ne', 'be', 'es', 'un', 'zu', 'an', 'ng', 'au', 'it',
47
- 'is', 'he', 'ht', 'se', 'ck', 'ic', 're', 'ns', 'sc', 'tz'},
48
- 'words': {'der', 'die', 'und', 'in', 'den', 'von', 'zu', 'das', 'mit',
49
- 'sich', 'des', 'auf', 'für', 'ist', 'im', 'dem', 'nicht',
50
- 'ein', 'eine', 'als'}
51
- }
52
- }
53
-
54
- # Compile word patterns
55
- self.word_pattern = re.compile(r'\b\w+\b')
56
-
57
- def _extract_ngrams(self, text: str, n: int = 2) -> List[str]:
58
- """Extract character n-grams from text."""
59
- text = text.lower()
60
- return [text[i:i+n] for i in range(len(text)-n+1)]
61
-
62
- def _calculate_char_frequencies(self, text: str) -> Dict[str, float]:
63
- """Calculate character frequencies in text."""
64
- text = text.lower()
65
- char_count = Counter(c for c in text if c.isalpha())
66
- total = sum(char_count.values()) or 1
67
- return {char: count/total for char, count in char_count.items()}
68
-
69
- def _calculate_ngram_frequencies(self, text: str) -> Dict[str, float]:
70
- """Calculate n-gram frequencies in text."""
71
- ngrams = self._extract_ngrams(text)
72
- ngram_count = Counter(ngrams)
73
- total = sum(ngram_count.values()) or 1
74
- return {ngram: count/total for ngram, count in ngram_count.items()}
75
-
76
- def _calculate_word_frequencies(self, text: str) -> Dict[str, float]:
77
- """Calculate word frequencies in text."""
78
- words = self.word_pattern.findall(text.lower())
79
- word_count = Counter(words)
80
- total = sum(word_count.values()) or 1
81
- return {word: count/total for word, count in word_count.items()}
82
-
83
- def _calculate_similarity(self, freq1: Dict[str, float], freq2: Dict[str, float]) -> float:
84
- """Calculate similarity between two frequency distributions."""
85
- common_keys = set(freq1.keys()) & set(freq2.keys())
86
- if not common_keys:
87
- return 0.0
88
-
89
- similarity = sum(min(freq1.get(k, 0), freq2.get(k, 0)) for k in common_keys)
90
- return similarity
91
-
92
- def detect(self, text: str) -> List[Tuple[str, float]]:
93
- """
94
- Detect the language of text with confidence scores.
95
-
96
- Returns:
97
- List of (language, confidence) tuples, sorted by confidence
98
- """
99
- if not text:
100
- return []
101
-
102
- # Calculate frequencies for input text
103
- char_freqs = self._calculate_char_frequencies(text)
104
- ngram_freqs = self._calculate_ngram_frequencies(text)
105
- word_freqs = self._calculate_word_frequencies(text)
106
-
107
- # Calculate similarity scores for each language
108
- scores = []
109
- for lang, profile in self.language_profiles.items():
110
- # Character similarity
111
- char_sim = sum(char_freqs.get(c, 0) for c in profile['chars'])
112
-
113
- # N-gram similarity
114
- ngram_sim = sum(ngram_freqs.get(ng, 0) for ng in profile['ngrams'])
115
-
116
- # Word similarity
117
- word_sim = sum(word_freqs.get(w, 0) for w in profile['words'])
118
-
119
- # Combined score (weighted average)
120
- total_score = (0.3 * char_sim + 0.4 * ngram_sim + 0.3 * word_sim)
121
- scores.append((lang, total_score))
122
-
123
- # Normalize scores
124
- total = sum(score for _, score in scores) or 1
125
- normalized_scores = [(lang, score/total) for lang, score in scores]
126
-
127
- # Sort by confidence
128
- return sorted(normalized_scores, key=lambda x: x[1], reverse=True)
webstoken/ner.py DELETED
@@ -1,164 +0,0 @@
1
- """
2
- Named Entity Recognition (NER) module for identifying and classifying named entities.
3
- """
4
-
5
- from typing import List, Tuple, Dict, Set
6
- import re
7
-
8
-
9
- class NamedEntityRecognizer:
10
- """Rule-based Named Entity Recognition."""
11
-
12
- def __init__(self):
13
- # Common entity patterns
14
- self.PERSON_TITLES = {
15
- 'mr', 'mrs', 'ms', 'miss', 'dr', 'prof', 'sir', 'madam',
16
- 'lord', 'lady', 'president', 'ceo', 'director'
17
- }
18
-
19
- self.ORGANIZATION_SUFFIXES = {
20
- 'inc', 'corp', 'ltd', 'llc', 'company', 'corporation',
21
- 'associates', 'partners', 'foundation', 'institute'
22
- }
23
-
24
- self.LOCATION_INDICATORS = {
25
- 'street', 'road', 'avenue', 'boulevard', 'lane', 'drive',
26
- 'circle', 'square', 'park', 'bridge', 'river', 'lake',
27
- 'mountain', 'forest', 'city', 'town', 'village', 'country'
28
- }
29
-
30
- self.DATE_MONTHS = {
31
- 'january', 'february', 'march', 'april', 'may', 'june',
32
- 'july', 'august', 'september', 'october', 'november', 'december'
33
- }
34
-
35
- # Compile regex patterns
36
- self.patterns = {
37
- 'EMAIL': re.compile(r'\b[\w\.-]+@[\w\.-]+\.\w+\b'),
38
- 'URL': re.compile(r'https?://(?:[\w-]|(?:%[\da-fA-F]{2}))+'),
39
- 'PHONE': re.compile(r'\+?\d{1,3}[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}'),
40
- 'DATE': re.compile(r'\b\d{1,2}[-/]\d{1,2}[-/]\d{2,4}\b|\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2},? \d{4}\b'),
41
- 'TIME': re.compile(r'\b\d{1,2}:\d{2}(?::\d{2})?(?:\s*[AaPp][Mm])?\b'),
42
- 'MONEY': re.compile(r'\$\d+(?:,\d{3})*(?:\.\d{2})?|\d+(?:,\d{3})*(?:\.\d{2})?\s*(?:dollars|USD|EUR|GBP)'),
43
- 'PERCENTAGE': re.compile(r'\b\d+(?:\.\d+)?%\b')
44
- }
45
-
46
- def is_capitalized(self, word: str) -> bool:
47
- """Check if a word is capitalized."""
48
- return word and word[0].isupper()
49
-
50
- def extract_entities(self, text: str) -> Dict[str, List[Tuple[str, str]]]:
51
- """
52
- Extract named entities from text.
53
-
54
- Returns:
55
- Dict mapping entity types to list of (text, label) tuples
56
- """
57
- entities = {
58
- 'PERSON': [],
59
- 'ORGANIZATION': [],
60
- 'LOCATION': [],
61
- 'DATE': [],
62
- 'TIME': [],
63
- 'MONEY': [],
64
- 'EMAIL': [],
65
- 'URL': [],
66
- 'PHONE': [],
67
- 'PERCENTAGE': []
68
- }
69
-
70
- # First find regex pattern matches
71
- for label, pattern in self.patterns.items():
72
- for match in pattern.finditer(text):
73
- entities[label].append((match.group(), label))
74
-
75
- # Process text word by word for other entities
76
- words = text.split()
77
- i = 0
78
- while i < len(words):
79
- word = words[i]
80
- next_word = words[i + 1] if i + 1 < len(words) else None
81
-
82
- # Check for person names
83
- if word.lower() in self.PERSON_TITLES and next_word and self.is_capitalized(next_word):
84
- name_parts = []
85
- j = i + 1
86
- while j < len(words) and self.is_capitalized(words[j]):
87
- name_parts.append(words[j])
88
- j += 1
89
- if name_parts:
90
- entities['PERSON'].append((' '.join(name_parts), 'PERSON'))
91
- i = j
92
- continue
93
-
94
- # Check for organizations
95
- if self.is_capitalized(word):
96
- org_parts = [word]
97
- j = i + 1
98
- while j < len(words) and (
99
- self.is_capitalized(words[j]) or
100
- words[j].lower() in self.ORGANIZATION_SUFFIXES
101
- ):
102
- org_parts.append(words[j])
103
- j += 1
104
- if len(org_parts) > 1 or (
105
- len(org_parts) == 1 and
106
- any(suff in word.lower() for suff in self.ORGANIZATION_SUFFIXES)
107
- ):
108
- entities['ORGANIZATION'].append((' '.join(org_parts), 'ORGANIZATION'))
109
- i = j
110
- continue
111
-
112
- # Check for locations
113
- if word.lower() in self.LOCATION_INDICATORS and i > 0:
114
- if self.is_capitalized(words[i - 1]):
115
- entities['LOCATION'].append((words[i - 1] + ' ' + word, 'LOCATION'))
116
-
117
- i += 1
118
-
119
- return entities
120
-
121
- def tag_text(self, text: str) -> List[Tuple[str, str]]:
122
- """
123
- Tag each word in text with its entity type.
124
-
125
- Returns:
126
- List of (word, entity_type) tuples
127
- """
128
- entities = self.extract_entities(text)
129
- tagged = []
130
-
131
- # Create a map of word positions to entity labels
132
- position_labels = {}
133
- text_lower = text.lower()
134
-
135
- for entity_type, entity_list in entities.items():
136
- for entity_text, _ in entity_list:
137
- start = text_lower.find(entity_text.lower())
138
- if start != -1:
139
- end = start + len(entity_text)
140
- for pos in range(start, end):
141
- position_labels[pos] = entity_type
142
-
143
- # Tag each character position
144
- current_pos = 0
145
- current_word = []
146
- current_label = 'O' # Outside any entity
147
-
148
- for char in text:
149
- if char.isspace():
150
- if current_word:
151
- tagged.append((''.join(current_word), current_label))
152
- current_word = []
153
- current_label = 'O'
154
- else:
155
- current_word.append(char)
156
- if current_pos in position_labels:
157
- current_label = position_labels[current_pos]
158
- current_pos += 1
159
-
160
- # Add last word if exists
161
- if current_word:
162
- tagged.append((''.join(current_word), current_label))
163
-
164
- return tagged
webstoken/normalizer.py DELETED
@@ -1,35 +0,0 @@
1
- """
2
- Text normalization utilities.
3
- """
4
-
5
- import re
6
- from typing import List, Set
7
-
8
-
9
- class TextNormalizer:
10
- """Text normalization utilities."""
11
-
12
- def __init__(self):
13
- self.stop_words: Set[str] = {
14
- 'a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'for', 'from',
15
- 'has', 'he', 'in', 'is', 'it', 'its', 'of', 'on', 'that', 'the',
16
- 'to', 'was', 'were', 'will', 'with'
17
- }
18
-
19
- def remove_stop_words(self, tokens: List[str]) -> List[str]:
20
- """Remove common stop words from token list."""
21
- return [token for token in tokens if token.lower() not in self.stop_words]
22
-
23
- def normalize(self, text: str) -> str:
24
- """Apply various normalization steps to text."""
25
- # Convert to lowercase
26
- text = text.lower()
27
-
28
- # Replace multiple spaces with single space
29
- text = re.sub(r'\s+', ' ', text)
30
-
31
- # Remove special characters except apostrophes within words
32
- text = re.sub(r'[^a-z0-9\s\']', '', text)
33
- text = re.sub(r'\s\'|\'\s', ' ', text)
34
-
35
- return text.strip()
webstoken/processor.py DELETED
@@ -1,77 +0,0 @@
1
- """
2
- Main text processing utilities combining all NLP components.
3
- """
4
-
5
- from typing import Dict, Any, List, Tuple
6
-
7
- from .tokenizer import SentenceTokenizer, WordTokenizer
8
- from .tagger import POSTagger
9
- from .stemmer import Stemmer
10
- from .normalizer import TextNormalizer
11
-
12
-
13
- def process_text(text: str, normalize: bool = True, remove_stops: bool = True) -> Dict[str, Any]:
14
- """
15
- Process text using all available NLP tools.
16
-
17
- Args:
18
- text (str): Input text to process
19
- normalize (bool): Whether to normalize text
20
- remove_stops (bool): Whether to remove stop words
21
-
22
- Returns:
23
- Dict containing processed results with the following structure:
24
- {
25
- 'sentences': [
26
- {
27
- 'original': str, # Original sentence
28
- 'tokens': List[str], # Word tokens
29
- 'pos_tags': List[Tuple[str, str]], # (word, tag) pairs
30
- 'stems': List[Tuple[str, str]] # (word, stem) pairs
31
- },
32
- ...
33
- ],
34
- 'num_sentences': int, # Total number of sentences
35
- 'num_tokens': int # Total number of tokens
36
- }
37
- """
38
- # Initialize tools
39
- sentence_tokenizer = SentenceTokenizer()
40
- word_tokenizer = WordTokenizer()
41
- pos_tagger = POSTagger()
42
- stemmer = Stemmer()
43
- normalizer = TextNormalizer()
44
-
45
- # Process text
46
- if normalize:
47
- text = normalizer.normalize(text)
48
-
49
- # Get sentences
50
- sentences = sentence_tokenizer.tokenize(text)
51
-
52
- # Process each sentence
53
- processed_sentences = []
54
- for sentence in sentences:
55
- # Tokenize words
56
- tokens = word_tokenizer.tokenize(sentence)
57
-
58
- # Remove stop words if requested
59
- if remove_stops:
60
- tokens = normalizer.remove_stop_words(tokens)
61
-
62
- # Get POS tags and stems
63
- tagged = pos_tagger.tag(tokens)
64
- stems = [(token, stemmer.stem(token)) for token, _ in tagged]
65
-
66
- processed_sentences.append({
67
- 'original': sentence,
68
- 'tokens': tokens,
69
- 'pos_tags': tagged,
70
- 'stems': stems
71
- })
72
-
73
- return {
74
- 'sentences': processed_sentences,
75
- 'num_sentences': len(sentences),
76
- 'num_tokens': sum(len(s['tokens']) for s in processed_sentences)
77
- }