webscout 6.5__py3-none-any.whl → 6.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (70) hide show
  1. webscout/Extra/autocoder/autocoder_utiles.py +119 -101
  2. webscout/Extra/weather.py +5 -5
  3. webscout/Provider/AISEARCH/__init__.py +2 -0
  4. webscout/Provider/AISEARCH/ooai.py +155 -0
  5. webscout/Provider/Amigo.py +70 -85
  6. webscout/Provider/{prefind.py → Jadve.py} +72 -70
  7. webscout/Provider/Netwrck.py +239 -0
  8. webscout/Provider/Openai.py +4 -3
  9. webscout/Provider/PI.py +2 -2
  10. webscout/Provider/PizzaGPT.py +3 -3
  11. webscout/Provider/TeachAnything.py +15 -2
  12. webscout/Provider/Youchat.py +42 -8
  13. webscout/Provider/__init__.py +134 -147
  14. webscout/Provider/meta.py +1 -1
  15. webscout/Provider/multichat.py +230 -0
  16. webscout/Provider/promptrefine.py +2 -2
  17. webscout/Provider/talkai.py +10 -13
  18. webscout/Provider/turboseek.py +5 -4
  19. webscout/Provider/tutorai.py +8 -112
  20. webscout/Provider/typegpt.py +4 -5
  21. webscout/Provider/x0gpt.py +81 -9
  22. webscout/Provider/yep.py +123 -361
  23. webscout/__init__.py +10 -1
  24. webscout/cli.py +31 -39
  25. webscout/conversation.py +24 -9
  26. webscout/exceptions.py +188 -20
  27. webscout/litprinter/__init__.py +19 -123
  28. webscout/litprinter/colors.py +54 -0
  29. webscout/optimizers.py +335 -185
  30. webscout/scout/__init__.py +2 -5
  31. webscout/scout/core/__init__.py +7 -0
  32. webscout/scout/core/crawler.py +140 -0
  33. webscout/scout/core/scout.py +571 -0
  34. webscout/scout/core/search_result.py +96 -0
  35. webscout/scout/core/text_analyzer.py +63 -0
  36. webscout/scout/core/text_utils.py +277 -0
  37. webscout/scout/core/web_analyzer.py +52 -0
  38. webscout/scout/element.py +6 -5
  39. webscout/update_checker.py +117 -58
  40. webscout/version.py +1 -1
  41. webscout/webscout_search.py +1 -1
  42. webscout/zeroart/base.py +15 -16
  43. webscout/zeroart/effects.py +1 -1
  44. webscout/zeroart/fonts.py +1 -1
  45. {webscout-6.5.dist-info → webscout-6.7.dist-info}/METADATA +9 -172
  46. {webscout-6.5.dist-info → webscout-6.7.dist-info}/RECORD +63 -45
  47. {webscout-6.5.dist-info → webscout-6.7.dist-info}/entry_points.txt +1 -1
  48. webscout-6.7.dist-info/top_level.txt +2 -0
  49. webstoken/__init__.py +30 -0
  50. webstoken/classifier.py +189 -0
  51. webstoken/keywords.py +216 -0
  52. webstoken/language.py +128 -0
  53. webstoken/ner.py +164 -0
  54. webstoken/normalizer.py +35 -0
  55. webstoken/processor.py +77 -0
  56. webstoken/sentiment.py +206 -0
  57. webstoken/stemmer.py +73 -0
  58. webstoken/t.py +75 -0
  59. webstoken/tagger.py +60 -0
  60. webstoken/tokenizer.py +158 -0
  61. webscout/Provider/Perplexity.py +0 -591
  62. webscout/Provider/RoboCoders.py +0 -206
  63. webscout/Provider/genspark.py +0 -225
  64. webscout/Provider/perplexitylabs.py +0 -265
  65. webscout/Provider/twitterclone.py +0 -251
  66. webscout/Provider/upstage.py +0 -230
  67. webscout-6.5.dist-info/top_level.txt +0 -1
  68. /webscout/Provider/{felo_search.py → AISEARCH/felo_search.py} +0 -0
  69. {webscout-6.5.dist-info → webscout-6.7.dist-info}/LICENSE.md +0 -0
  70. {webscout-6.5.dist-info → webscout-6.7.dist-info}/WHEEL +0 -0
@@ -4,23 +4,23 @@ webscout/AIutel.py,sha256=gsV08e3F6aH4j7wH5V9X3N9T5Ea5MAIQq4M47vNofuI,12020
4
4
  webscout/Bard.py,sha256=CmDhKC67Ki2xA8Rkme6EC-gLmq3PhAAAGd5tcpJ0KJo,13234
5
5
  webscout/DWEBS.py,sha256=GsKbaVgcxDC5eHtNjxr6r5uk3_8NhtxVG2dqJJVGqaI,18543
6
6
  webscout/LLM.py,sha256=essCz1nakJfmoKLJFguyJnba0HR4AfY6BVU0CEGDCcQ,16336
7
- webscout/__init__.py,sha256=IMDl0kRpLtNMi1HOW6HXoBp0CevEpLPkol8GJhorMno,657
7
+ webscout/__init__.py,sha256=eMlqim4Cnc-BLcegMaTtDb2NGt3n48E331IqLx125Mk,864
8
8
  webscout/__main__.py,sha256=pBm2E3ZZiMcCH37b1YCz7qKdKdX_i_S5En6fZDeJKFw,103
9
- webscout/cli.py,sha256=bLaSGs24wxfZlK37_WPPr1jjfbv50nfbxkeSkED0jX0,12456
10
- webscout/conversation.py,sha256=fWWrBwhZztasvW7aLSLsTLpHzuTeuV4jGXPWzXukNj8,8088
11
- webscout/exceptions.py,sha256=qdyWoLV1nEPffb5vz9cu9EfBwIwZ4nmcCinqeJDBWTo,5182
12
- webscout/optimizers.py,sha256=gMZqYA7Slw7F9e_Gp4umkpMYKtdVFJBn-yDrlFYP4Wo,9168
9
+ webscout/cli.py,sha256=3AG2adVRirqKQMvXX312nswNBtRpmW9-2C16eAhfRd4,11994
10
+ webscout/conversation.py,sha256=LMZWJVnyc1P5GLAzCLKvw6UU07nD3wA44l9sKh3nu8k,8763
11
+ webscout/exceptions.py,sha256=10OLw5gFpfyGSSfAMRWT2OYCkdiEJR2byRv74kftAhc,11383
12
+ webscout/optimizers.py,sha256=ALT4SeO_s_JV6yWG7XJJ3aZIoEiTN1PIaCZJtnGdxXs,10749
13
13
  webscout/prompt_manager.py,sha256=Jc0demWN6M6QcwRp14aHZR05r_PVPOaG8PnQkO7pDZ4,9806
14
14
  webscout/tempid.py,sha256=7ZTN2eAYqUO2deSdzzhZfgDRxE65OOhGuTBD7f8bTCM,5004
15
- webscout/update_checker.py,sha256=mw4XOEhGpe68nXuo6yXIQqA_Vt5sJhY_-5iUWsiF0oo,4179
15
+ webscout/update_checker.py,sha256=mLFD_OYjtEdUvXUiNt8wSbspmkFRmNlULf5sVyVZi60,5129
16
16
  webscout/utils.py,sha256=LVW7U0XcGYqigqxV6D5YXeGMrc_mt7PnNG_YnKf9bBM,3059
17
- webscout/version.py,sha256=K8NenkGw5y4bDvKGg-xeUewcUgMDcrcEPUuhVqQbUw8,44
18
- webscout/webscout_search.py,sha256=HHdO9XLToC_9nIMM_SaTOKKfzdhrKjb7o8Zi3ZD1O7Y,44744
17
+ webscout/version.py,sha256=2LxYXNP6zbhOX47Xl9v4Dpp05K2LUel-8-Ayyzbaw-c,44
18
+ webscout/webscout_search.py,sha256=kFdcr3-2LaksTbFy9Pmrs_Gfi9XwtfxKKk5_H0hBa80,44748
19
19
  webscout/webscout_search_async.py,sha256=2-RCa9Deahhw3Bti78kXfVaX8y3Aygy4L7HeCaITk9M,14519
20
20
  webscout/Extra/__init__.py,sha256=FbDnwI3zZdoQFosA5Q2bIYpJlHUKFWiFmFKvnk9xWKY,153
21
21
  webscout/Extra/autollama.py,sha256=Mcj7YT8mYL5J7Rg_Wmi3Ppcfh1WK6UWcrbUZySetwuU,8198
22
22
  webscout/Extra/gguf.py,sha256=u_HQ00hiKCcF4GiVabUnFTXEPTmUpa2ADjBNHxtR7bw,16053
23
- webscout/Extra/weather.py,sha256=q-h5UXL2XEBEgoqvEeLRut-ThieNzH_GNfOEIeghKLM,6000
23
+ webscout/Extra/weather.py,sha256=XVi9fb7KZdUNaZjGzCHo8UIkK4v8-rWVfYuDiFbN8WY,5960
24
24
  webscout/Extra/weather_ascii.py,sha256=AsSJT6OCpg9vxzW5h7h0s0PEMq_m_ixvcc7VDtNERdw,793
25
25
  webscout/Extra/YTToolkit/YTdownloader.py,sha256=NfbukCKdyWycl9RzJBXzqSPKW6FwWe7EQxhfLf_dJj8,34793
26
26
  webscout/Extra/YTToolkit/__init__.py,sha256=Wn1K-f6OjZ4GuWvL3FTM4zlTaF3xdb4v_K60YDxKdXg,75
@@ -38,7 +38,7 @@ webscout/Extra/YTToolkit/ytapi/stream.py,sha256=7VscCVBwLx_Lz8qFOQdKCSyeAiv_A3aU
38
38
  webscout/Extra/YTToolkit/ytapi/utils.py,sha256=ENjmAjlOTLib0ANxysuSb2kgDl3NtDTvW60b18H-Rzc,1966
39
39
  webscout/Extra/YTToolkit/ytapi/video.py,sha256=OzX1XJJ4qRjti0VOlrsOC8kDvZihMVgUFTyrT-AYYms,3877
40
40
  webscout/Extra/autocoder/__init__.py,sha256=jcDJEXVde_ATffVNjV4vQMvvWH0UDW-HRz5OKNoZ7cg,218
41
- webscout/Extra/autocoder/autocoder_utiles.py,sha256=hGAnOWJISuHm0UOKSWwre9_wCqJqDGYVbvyKJ6GrnuQ,4268
41
+ webscout/Extra/autocoder/autocoder_utiles.py,sha256=sFDvbJugd4emgNPr8CQB9IhdZqz4w8TOMZddHyEjNyI,6210
42
42
  webscout/Extra/autocoder/rawdog.py,sha256=o2lMY4Fwvl3GawBzwGXuqu-5dPFzDyX3ZZL1xmQV7Ls,25806
43
43
  webscout/Extra/markdownlite/__init__.py,sha256=IJQ5SOs4nxmAdhZk8JylNdD6SsKJPZgvnzGqu6v4qWc,32339
44
44
  webscout/Litlogger/__init__.py,sha256=Q5FPfGKktfFyvqa6kMbye9LLzuy6VXGOgE3Q-uBGD2E,23928
@@ -51,7 +51,7 @@ webscout/Local/thread.py,sha256=ctnoJtkOpKbpdlqCjsnyM2sI-3_AKg9PzNwS3QAIifQ,2360
51
51
  webscout/Local/ui.py,sha256=FhqBcC7SVHwRfvrnTduoa7gpQNWYzrSc7OGm89ErNPY,15064
52
52
  webscout/Local/utils.py,sha256=kYXfR9h4JFojkiKQhF1XHclqX0vB0qPKOkfYKjrgwuc,13216
53
53
  webscout/Provider/AI21.py,sha256=JBh-xnspxTZNMcl-Gd0Cgseqht9gTM64TUv9I4Imc9k,6218
54
- webscout/Provider/Amigo.py,sha256=bs5M-HzfKIcdCfS_50JMsX5DpS0eBWwtHNyOrTR7lAo,12304
54
+ webscout/Provider/Amigo.py,sha256=LkZuvFUwkmdzgMhUEMYvXKpNpLstjznRMe-CQydYO7g,11637
55
55
  webscout/Provider/Andi.py,sha256=-vqXWlJcHzhfZ-iW4qJy9yxzctPKp2RdmdW02hf4XLs,8863
56
56
  webscout/Provider/Bing.py,sha256=zxIzq7dlqaLskx9LsYppbMJuwfGtYid3Uh0gIhZ0hps,9001
57
57
  webscout/Provider/Blackboxai.py,sha256=KntNTWHHEmXegKFxm79_2tX_FLQF-_7xssrB6T0nCgM,8841
@@ -69,23 +69,23 @@ webscout/Provider/Free2GPT.py,sha256=Lf8dd9iacry_GRr6CT86Cjcp-oNzLyw1O_TWIPDFDxk
69
69
  webscout/Provider/GPTWeb.py,sha256=xh_mnBko6RDyz2v3KhMux-yIipipkZfVVVoUJWkbzcQ,7466
70
70
  webscout/Provider/Gemini.py,sha256=Vg2MLqQ_qxHkcN7Zikife1dyVK-y94ik8y6MAu-VzUI,7801
71
71
  webscout/Provider/Groq.py,sha256=iqyewnxWwN7fMG-dqAR_SyUqImfyZS880lO5iaXso9c,28636
72
+ webscout/Provider/Jadve.py,sha256=_8qlC0oWjGlJAUL6yH__cd6NSGkT0mcLdEZHixFlp-M,9394
72
73
  webscout/Provider/Koboldai.py,sha256=gpRgyDe4OQWwNqT7MWnNrJx4dnFmCW23KUx0Ezjgchk,15185
73
74
  webscout/Provider/Llama.py,sha256=N01p3ZVD1HgRnNNxhjRhBVD4m_qiextdyF1KDfJlqbE,7703
74
75
  webscout/Provider/Llama3.py,sha256=fU1iyKflFHDeSqa32M6UE2JtADZB0B7rcG5HYj5MWSQ,7581
75
76
  webscout/Provider/Marcus.py,sha256=6LvBYnAcbwVYiXoQ8ZrprefJ6zS2KdveiEZauClNtSE,5045
77
+ webscout/Provider/Netwrck.py,sha256=TtW-W4oSGwf8JiMBXN6EBttQMA0tMaYvuYnRs0YNI68,9482
76
78
  webscout/Provider/NinjaChat.py,sha256=tMja5xItus6WoKJm_fhILLoGyPdplikqr-nxtuUziNU,8617
77
79
  webscout/Provider/OLLAMA.py,sha256=RQXJt-PJYnA15_IXhUy4mM9qwm4PcBMfINaZm2KG6zE,7018
78
- webscout/Provider/Openai.py,sha256=32uxZmZOovzshMQmqDcJ39If7N_UW4B3EeYmaxP_GwE,19983
79
- webscout/Provider/PI.py,sha256=dMUpd1hLg72W7rnsza3k9QHBLmAi3GHSPRCkzmH-Ags,12860
80
- webscout/Provider/Perplexity.py,sha256=BC4ZbVAjumNhw4_wNbs1Y4lwshEpJF5lanTmSqkXVG8,20476
80
+ webscout/Provider/Openai.py,sha256=mpJ9VgIyvW6uo0-jIcx5Qcz4PIUAj3xVLOgKrB1z9pU,20074
81
+ webscout/Provider/PI.py,sha256=Z2yXXCv84AUbvNNaBQHkJ-Hgp-oc8sPfcvWytYwISVc,12781
81
82
  webscout/Provider/Phind.py,sha256=NA_b3B4h-kutX6wdoEg4THPfZggl2UeXPbramzZ6oiU,19297
82
- webscout/Provider/PizzaGPT.py,sha256=tEuEJAGbv-mTd479i3EgaqHd4NwgkrmMW0fpSsGm_N0,7207
83
+ webscout/Provider/PizzaGPT.py,sha256=GkyvWvs_aIcXSpM-j-CfH7Jsft2vU1IUxXmyZ-Z23kI,7120
83
84
  webscout/Provider/RUBIKSAI.py,sha256=fHCOAFWGXIESA05Bmuqtx6ZChnWomHOMwndfTLv1Hg8,8517
84
85
  webscout/Provider/Reka.py,sha256=dWw4vX91nJhAn-X1SXK72gttRaTqWNGUBFaeRJobTJg,8519
85
- webscout/Provider/RoboCoders.py,sha256=vH1B0cKzQW6fU1TWwpUlnuQFW3uirzIWj0WIpiPal1s,7957
86
- webscout/Provider/TeachAnything.py,sha256=u5HxlVdOnRWXzA-cRSRhhdnvdsfafJ_4wr9qKIuW45s,6743
87
- webscout/Provider/Youchat.py,sha256=G6I4TqjZuX3refFF3SkOgl_YXY57hjsMg_wRjPgqWIE,9014
88
- webscout/Provider/__init__.py,sha256=lPzcVlyGtXFb9WJWOv0-jLdHRNXqIANpjqx-8SAGTYU,3191
86
+ webscout/Provider/TeachAnything.py,sha256=6scp1tzcc5z_TKUALArQ4APCNElHJ7SzsJoiB71o0V0,7169
87
+ webscout/Provider/Youchat.py,sha256=hwm16gmyvMrRseFHl7nrcaHzGlY8BUx3y0KD-YQjTnc,10082
88
+ webscout/Provider/__init__.py,sha256=rns4O8C6Thft04CCq9OgAGl1GE8n-ofsvjcyt8CfGfc,2715
89
89
  webscout/Provider/ai4chat.py,sha256=av96iS4QPt9IzhcswowmDY2F8IUSLl1YVHZ4bAbfO-s,8140
90
90
  webscout/Provider/aimathgpt.py,sha256=BdXNxEHQP11p6m0wl2Q-uben46A6lMKOg89utV1S7aI,7320
91
91
  webscout/Provider/askmyai.py,sha256=XDxLQfIztI-jwergalUfNiMOwNEw62jGTN4yTlar8Po,5895
@@ -93,11 +93,9 @@ webscout/Provider/bagoodex.py,sha256=OdYIyvypX-Vkok9zAr6F5NwFQx_z0bKhiw8U4tTFw5o
93
93
  webscout/Provider/cerebras.py,sha256=cyUIgGdVMAmBgTifwuPW-XGC2SKGL2CbTKxY8al9ji4,7986
94
94
  webscout/Provider/cleeai.py,sha256=tXs_XuYZ2ZGmSOSTS0yJ59qbvFCJusE7yZkIqALwUbE,8064
95
95
  webscout/Provider/elmo.py,sha256=6C-j8xLfgyMjmv8jdjey0BPzl_UwEFpMEbBtYobfPd0,9387
96
- webscout/Provider/felo_search.py,sha256=q_Mc4SgSnkCm5vu-CMmn9kVO3k0YCZqq_uMUrVUXInU,6888
97
96
  webscout/Provider/gaurish.py,sha256=yKV4MvOvjMEsXY2fQXDc0YHyFBtW2sh-pA4YNdY--wE,8472
98
97
  webscout/Provider/geminiapi.py,sha256=c2zvwqkRgscI8vU1FU4qb_4fPe374LIQJ_uHNM9lmF8,8297
99
98
  webscout/Provider/geminiprorealtime.py,sha256=unQRqHkDt_gkZ_Ls0qBb73d6YTtqYwd1uhYW0RrXKI0,5845
100
- webscout/Provider/genspark.py,sha256=zKg0nWNzi38Mai_ShBISnmRSpfrBXAIZcTYyf8iX9h8,8764
101
99
  webscout/Provider/julius.py,sha256=unD3UUnW8-Bie4o0__Vn7cXfqnaVvlE3MWI-LYGb5VE,8576
102
100
  webscout/Provider/koala.py,sha256=qBtqjTvhoMQdDE8qUH0XuNa_x2bic77d7CUjIVboask,10106
103
101
  webscout/Provider/learnfastai.py,sha256=xANWSHF3e6kTKgwbAHYdjNWIwYvr5Bin0NDENmJvruE,9763
@@ -105,19 +103,19 @@ webscout/Provider/lepton.py,sha256=4RiQ4YNJljX558yhSUqws6pf1Yhf7pWIesa4SRQCry8,7
105
103
  webscout/Provider/llama3mitril.py,sha256=3Ur3GMkmSSTmyxJh1u9fF0xlZ7ssFF6Jxuoi587N1lw,6501
106
104
  webscout/Provider/llamatutor.py,sha256=DijA4Y1CVDz-Ks8pACTSb6hnOxdQD2IHw-_ztRqhyPQ,8871
107
105
  webscout/Provider/llmchat.py,sha256=gr7ewEPWWIfjhmpQXTOZJxQffol8MA1yWAIXdG3VZZo,7944
108
- webscout/Provider/meta.py,sha256=N1Ia1iDADfk1QJUHb10Vq5UvObiybQsi5QlUoNVaY-M,30456
106
+ webscout/Provider/meta.py,sha256=3LkGoiGtorc28aURqIpdbMdrDt7QfdARpl1Fp7XNSIg,30460
109
107
  webscout/Provider/mhystical.py,sha256=W3j28XOnVGlRPCSYrjA5N7okYEdTVeOw41HdocfvzqE,6555
110
- webscout/Provider/perplexitylabs.py,sha256=3_qnAoh0nsp6uhqFqzwUyXBezAzmRFs4YSRxVIokYF8,9651
111
- webscout/Provider/prefind.py,sha256=msw_kXO59Xz-xG5f72O3WIww-HVyUAj-WpByeLiCB1U,9306
112
- webscout/Provider/promptrefine.py,sha256=NbRdvCsiiLIDwU39wzxch5yGwmLvbauljAl9vC73nR4,7714
113
- webscout/Provider/talkai.py,sha256=UZNiuQm5IPX_VFPOIVc1SJNPTKAcbvaOVO9fsVDVXNY,7434
114
- webscout/Provider/turboseek.py,sha256=yDOZcS3UqHcas_pH1AcB-XLD6C2FRzZxLp8rJOhUZGQ,8521
115
- webscout/Provider/tutorai.py,sha256=ov-gRPAh22n8v7TWjeLdPnuHgCcaAsdpc2IfxfuSIEk,14716
116
- webscout/Provider/twitterclone.py,sha256=SsLFZ-PansLdhw0HHBrqZtrjI4ItiHvoHfQBroXHORw,9509
117
- webscout/Provider/typegpt.py,sha256=6_i06EYXlJyqIMlZ_NwG4w9D96C6yfIMvAfHW-R9P6Q,12732
118
- webscout/Provider/upstage.py,sha256=rdH94hIwR98HKfar576WzVmgdB1DU2H8qaChUmJFtPc,9237
119
- webscout/Provider/x0gpt.py,sha256=Z8U4MQIRfhHpdarHO6_BZ27veXMDEAneguJ7uFSD_HU,6478
120
- webscout/Provider/yep.py,sha256=ge7a3cK02G6tbT0_q9glH7ujCx3QlgrqwBuHasQVYfQ,20581
108
+ webscout/Provider/multichat.py,sha256=c1TFCGv3Ds06mO5eYl_6YmUjGkg_dkqh2weQpYYsc08,8415
109
+ webscout/Provider/promptrefine.py,sha256=W0Ka59yRRKCOBXfF_hF-iF30F0AR45OPxgCCZ6mZzuA,7722
110
+ webscout/Provider/talkai.py,sha256=FHBZzBdHrOVn41nkhhJmjhHuR1NKTOBE7lGgYDV-0dk,7598
111
+ webscout/Provider/turboseek.py,sha256=uEoL5eJFvnpDCymTufrC8O9Ni3i8acb0GBy4JweFeIE,8474
112
+ webscout/Provider/tutorai.py,sha256=qBwo53bNtCumFonF43m0RCOQx6dU6t869Cnhw3H6qSM,11204
113
+ webscout/Provider/typegpt.py,sha256=TMNu1h8pyiTqynJoZXi150TjGzGkyZzYGvFKiXnZM3g,12700
114
+ webscout/Provider/x0gpt.py,sha256=eKDEbUIkY1Cc8mCad-CFA4ZgBXOmR7T7rKf89lh16-8,9383
115
+ webscout/Provider/yep.py,sha256=uor5RXrEjdoTsEvAROnAa6cZnW2Zlpm382AoltGkvRg,9926
116
+ webscout/Provider/AISEARCH/__init__.py,sha256=V9Su6olCAu7JUy1NK_ia198_7YOFfYvUb6u0XGvLQRg,47
117
+ webscout/Provider/AISEARCH/felo_search.py,sha256=q_Mc4SgSnkCm5vu-CMmn9kVO3k0YCZqq_uMUrVUXInU,6888
118
+ webscout/Provider/AISEARCH/ooai.py,sha256=Ubi8lruI4vBcxQQUO3_qufqOs-7qH34hGNOSk47vuFU,5722
121
119
  webscout/Provider/TTI/__init__.py,sha256=9Hu_y-z6Ev2SQL5-IF_B4UDetoOl7hVclka1RatQ6_M,225
122
120
  webscout/Provider/TTI/AiForce/__init__.py,sha256=Ukeas_Ny6VQBwCDEWuBMlWbeTki82ir1MtZdR2vNJPc,663
123
121
  webscout/Provider/TTI/AiForce/async_aiforce.py,sha256=7Gpgu__TPNLZwxayqfOOkRcKpIdAfE1x8wE-RoylEAY,9647
@@ -156,24 +154,44 @@ webscout/Provider/TTS/streamElements.py,sha256=0cfayE1eQYQj4ILF9tmHdodIcw9WknTKs
156
154
  webscout/Provider/TTS/utils.py,sha256=-2sXcGG1lDBIr32fssI1Tf9yxl7gMWZRM0xzUFebeko,10599
157
155
  webscout/Provider/TTS/voicepod.py,sha256=cMF7k88cP-RMqkqcoT8cu1o-eehxd2tqiq8laJeuMJw,4227
158
156
  webscout/litagent/__init__.py,sha256=V-hXEmMp3UH1qKmJDtL0j6_mazmbyyRrIqKqt__oIRw,6091
159
- webscout/litprinter/__init__.py,sha256=eMGF91G3l8qMHDwwCnL-8-fGGGMLTSsdd9GfIRviJCA,31565
160
- webscout/scout/__init__.py,sha256=fhN3P47AmrK2dINYc_lXqPbrITtMO5Gnnqs5PmoZqLY,241
157
+ webscout/litprinter/__init__.py,sha256=3Hdtj0-YSdwNqxc_Uc_yZIv7gnsap4e4dvqni4CA9oY,28821
158
+ webscout/litprinter/colors.py,sha256=5L_WmYFcp2JoX1rCS4GvFCNCOlj2EhEJRGWvqvs8M54,1247
159
+ webscout/scout/__init__.py,sha256=C-uYGqVR7iiScetSxUTHc76i0OLQnWJO7WFTfhgafW4,325
161
160
  webscout/scout/core.py,sha256=Joiw1RTPse2VQOjF5y6Uksa_ixsUSIO58RLWnyx6gbU,28761
162
- webscout/scout/element.py,sha256=PZ-Hd5eyhUtqfPNY5wAPm-gNLOdPD3_y0H94mFrqBw4,14787
161
+ webscout/scout/element.py,sha256=DakYPD49kIqXI358XEKeJSQTW802h4B2bP2z_FE5M3Q,14861
163
162
  webscout/scout/utils.py,sha256=a9QJnsJ6LyYRRrjJYGUrvFfkBsynTcIgXR86t4tgaM4,979
163
+ webscout/scout/core/__init__.py,sha256=ncF9MkM3iO1B2Ew_nOwzLQeeJ1xkCdoJ77CCZADD974,296
164
+ webscout/scout/core/crawler.py,sha256=VNUSEhJx8gLzYDgK0aXKYqXNLnbI-OFroLUlnQ9WYEw,5144
165
+ webscout/scout/core/scout.py,sha256=LZcC7bjis8BWqSwhm6Q6wKaMvrDEnzbqa91wlfeqpG4,19300
166
+ webscout/scout/core/search_result.py,sha256=rx0lz0qd6Rvaj79885yKQQXR_0cQrPtoJdzVX5J0msw,2962
167
+ webscout/scout/core/text_analyzer.py,sha256=gm30VZAnAH5DPgt6OhYd4bOriK-sSwgtEBqrx4NWm6o,1846
168
+ webscout/scout/core/text_utils.py,sha256=N44y2zeUt6MSUf3jlG4TCUdOF9DgkcL4UILdWrpRUeQ,10823
169
+ webscout/scout/core/web_analyzer.py,sha256=iPlt1GIU-NlUR-ccIXguLYQgE1IGu4S2kjEhT87ofZc,1674
164
170
  webscout/scout/parsers/__init__.py,sha256=a0gysttcAnIxfY8UBbkNJdmMHEhfeI9VmfnQGZVYKPk,1762
165
171
  webscout/scout/parsers/html5lib_parser.py,sha256=VRCQIDh7Z9XbS8-zSOsmRtR2Oyx_DEz85hjRvabBQ2Y,5627
166
172
  webscout/scout/parsers/html_parser.py,sha256=pHCKl-n_9uNt2ld09nY6hiyFuCi4azT7Be8Rzlo1GQQ,6820
167
173
  webscout/scout/parsers/lxml_parser.py,sha256=fDaxFuBSlU9x5hH5nDj5BHd72r9XHZ24Z5lt6FYPt_8,6053
168
174
  webscout/swiftcli/__init__.py,sha256=Yr_bsL1E3FaUh-xqWK_2ecG91IC2mSiP7eBa3en0mHc,27758
169
175
  webscout/zeroart/__init__.py,sha256=WKB5gM0VY-trL0rWmMwYiStFyoMGe9ivx0DQ-IJe0W8,1308
170
- webscout/zeroart/base.py,sha256=7Z5I5qFm11g5R8PktltDze8zF2VAIXswNrb5Z-jlRvw,1821
171
- webscout/zeroart/effects.py,sha256=_UWmnxlTlf1gnT90nAXmgj-0j7_GAK7yX5KTNj34728,2932
172
- webscout/zeroart/fonts.py,sha256=nBwIMxuXTJiZq2C654tPEiHt0KwyuO6iYnuBjQIf_O0,24842
176
+ webscout/zeroart/base.py,sha256=X7bjkpD9iaP6IS6gHS7A12j8jRHbxzpJZb3ZJyhdxD0,1923
177
+ webscout/zeroart/effects.py,sha256=aSuoT68sOBvzuCM54mOu8LO6VhKaIq6SbYpcVdF1Jm0,2933
178
+ webscout/zeroart/fonts.py,sha256=VrmHowbkIvjqnanI6MbUxNji2cYhwzEaXgm-DxXec3k,24843
173
179
  webscout/zerodir/__init__.py,sha256=NHDON7WoN26G80uRXxPHH3jeTQhvz25Zx0yA7qq81fw,8396
174
- webscout-6.5.dist-info/LICENSE.md,sha256=5mkWS6cgjGxJClmN7n--h0beF3uFAOV_Ngr1YTK33Tk,9203
175
- webscout-6.5.dist-info/METADATA,sha256=phd5n2QatIQPBHY2hnEiY3z7LTiq0kkfSiRA-6PBDlI,46144
176
- webscout-6.5.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
177
- webscout-6.5.dist-info/entry_points.txt,sha256=Hh4YIIjvkqB9SVxZ2ri4DZUkgEu_WF_5_r_nZDIvfG8,73
178
- webscout-6.5.dist-info/top_level.txt,sha256=nYIw7OKBQDr_Z33IzZUKidRD3zQEo8jOJYkMVMeN334,9
179
- webscout-6.5.dist-info/RECORD,,
180
+ webstoken/__init__.py,sha256=I38YnMUFGT30I-6754-tIUmFnTZnWoVA8AP1-FpD-fU,774
181
+ webstoken/classifier.py,sha256=CApGnXyPbaS87SN9_MFb4XTh24UTKdaXekR6JcWAFJo,7463
182
+ webstoken/keywords.py,sha256=6PSukt3omZWe5T3Gt_GAW1ypol2Oylk2AwfXrLCLVUU,8262
183
+ webstoken/language.py,sha256=mRAIujNI27urD5XHYIixLdBGazpQYDMmErw0gKb4ivU,5936
184
+ webstoken/ner.py,sha256=mOon6O9XGs0gsI2VidLxLrL3qTW6lwqzLEa3a1OwVyM,6075
185
+ webstoken/normalizer.py,sha256=-raK5_VMyyeXxEYF7panghYejeQkhKJqxqdhDCX1zwI,1110
186
+ webstoken/processor.py,sha256=szBNJNFjAjSOHoH4Q2h_MugE_AOnj-QoZwS83sn68Gg,2369
187
+ webstoken/sentiment.py,sha256=Bivwm9Wmo2GMXvKYxPxd1vrnyDwDywyjxmOIe6jP6NM,7996
188
+ webstoken/stemmer.py,sha256=AYg1frOaS2CWF-KvFwh3_s-VMZUa0olM7CN1UaEpc-8,2551
189
+ webstoken/t.py,sha256=jrgacr0xK8Xbc9BZNtMknZv2xQagg9eMO3MXwCvIIFE,2558
190
+ webstoken/tagger.py,sha256=RgDxPw0E6VgeXTrAFnnOb4X2J2Hu3snafr-MJeWtHlc,2246
191
+ webstoken/tokenizer.py,sha256=RAaihP3Yq4OFHcXrTNUGBDLbq1-ti_lVUEw0CIPPCww,5858
192
+ webscout-6.7.dist-info/LICENSE.md,sha256=5mkWS6cgjGxJClmN7n--h0beF3uFAOV_Ngr1YTK33Tk,9203
193
+ webscout-6.7.dist-info/METADATA,sha256=NaQoy4zNP_3FPsgqqCkP1FzMoy5KsUS9vb5TjSP38Sc,40718
194
+ webscout-6.7.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
195
+ webscout-6.7.dist-info/entry_points.txt,sha256=7thMsVUoHiXGoIH1NeoocKpxlszWflNsNyrnDqGzvO0,70
196
+ webscout-6.7.dist-info/top_level.txt,sha256=KQtbgkA3gxcsADB0hIIx-heydmEYXpAY7xn3LjwDx0E,19
197
+ webscout-6.7.dist-info/RECORD,,
@@ -1,3 +1,3 @@
1
1
  [console_scripts]
2
2
  WEBS = webscout.cli:cli
3
- webscout = webscout.webai:main
3
+ webscout = webscout.cli:cli
@@ -0,0 +1,2 @@
1
+ webscout
2
+ webstoken
webstoken/__init__.py ADDED
@@ -0,0 +1,30 @@
1
+ """
2
+ Webstoken - A pure Python NLP toolkit for text processing
3
+ """
4
+
5
+ from .tokenizer import SentenceTokenizer, WordTokenizer
6
+ from .tagger import POSTagger
7
+ from .stemmer import Stemmer
8
+ from .normalizer import TextNormalizer
9
+ from .processor import process_text
10
+ from .ner import NamedEntityRecognizer
11
+ from .classifier import TextClassifier, TopicClassifier
12
+ from .language import LanguageDetector
13
+ from .sentiment import SentimentAnalyzer
14
+ from .keywords import KeywordExtractor
15
+
16
+ __version__ = '0.1.0'
17
+ __all__ = [
18
+ 'SentenceTokenizer',
19
+ 'WordTokenizer',
20
+ 'POSTagger',
21
+ 'Stemmer',
22
+ 'TextNormalizer',
23
+ 'process_text',
24
+ 'NamedEntityRecognizer',
25
+ 'TextClassifier',
26
+ 'TopicClassifier',
27
+ 'LanguageDetector',
28
+ 'SentimentAnalyzer',
29
+ 'KeywordExtractor'
30
+ ]
@@ -0,0 +1,189 @@
1
+ """
2
+ Text classification module using rule-based and statistical approaches.
3
+ """
4
+
5
+ from typing import Dict, List, Set, Tuple
6
+ from collections import Counter
7
+ import math
8
+ import re
9
+
10
+ from .normalizer import TextNormalizer
11
+ from .tokenizer import WordTokenizer
12
+
13
+
14
+ class TextClassifier:
15
+ """Simple text classifier using TF-IDF and cosine similarity."""
16
+
17
+ def __init__(self):
18
+ self.word_tokenizer = WordTokenizer()
19
+ self.normalizer = TextNormalizer()
20
+ self.documents: Dict[str, List[str]] = {} # category -> list of documents
21
+ self.vocabulary: Set[str] = set()
22
+ self.idf_scores: Dict[str, float] = {}
23
+ self.category_vectors: Dict[str, Dict[str, float]] = {}
24
+
25
+ def train(self, documents: Dict[str, List[str]]) -> None:
26
+ """
27
+ Train the classifier on labeled documents.
28
+
29
+ Args:
30
+ documents: Dict mapping categories to lists of documents
31
+ """
32
+ self.documents = documents
33
+
34
+ # Build vocabulary and document frequencies
35
+ doc_frequencies: Dict[str, int] = Counter()
36
+ total_docs = sum(len(docs) for docs in documents.values())
37
+
38
+ for category, docs in documents.items():
39
+ for doc in docs:
40
+ # Normalize and tokenize
41
+ doc = self.normalizer.normalize(doc)
42
+ tokens = self.word_tokenizer.tokenize(doc)
43
+
44
+ # Update vocabulary and document frequencies
45
+ unique_tokens = set(tokens)
46
+ self.vocabulary.update(unique_tokens)
47
+ doc_frequencies.update(unique_tokens)
48
+
49
+ # Calculate IDF scores
50
+ self.idf_scores = {
51
+ word: math.log(total_docs / (freq + 1))
52
+ for word, freq in doc_frequencies.items()
53
+ }
54
+
55
+ # Calculate TF-IDF vectors for each category
56
+ for category, docs in documents.items():
57
+ category_vector: Dict[str, float] = {word: 0.0 for word in self.vocabulary}
58
+
59
+ for doc in docs:
60
+ # Get term frequencies
61
+ doc = self.normalizer.normalize(doc)
62
+ tokens = self.word_tokenizer.tokenize(doc)
63
+ term_freqs = Counter(tokens)
64
+
65
+ # Update category vector with TF-IDF scores
66
+ for word, tf in term_freqs.items():
67
+ if word in self.idf_scores:
68
+ category_vector[word] += tf * self.idf_scores[word]
69
+
70
+ # Average the scores
71
+ for word in category_vector:
72
+ category_vector[word] /= len(docs)
73
+
74
+ self.category_vectors[category] = category_vector
75
+
76
+ def _calculate_vector(self, text: str) -> Dict[str, float]:
77
+ """Calculate TF-IDF vector for input text."""
78
+ # Normalize and tokenize
79
+ text = self.normalizer.normalize(text)
80
+ tokens = self.word_tokenizer.tokenize(text)
81
+ term_freqs = Counter(tokens)
82
+
83
+ # Calculate TF-IDF scores
84
+ vector = {word: 0.0 for word in self.vocabulary}
85
+ for word, tf in term_freqs.items():
86
+ if word in self.idf_scores:
87
+ vector[word] = tf * self.idf_scores[word]
88
+
89
+ return vector
90
+
91
+ def _cosine_similarity(self, vec1: Dict[str, float], vec2: Dict[str, float]) -> float:
92
+ """Calculate cosine similarity between two vectors."""
93
+ dot_product = sum(vec1[word] * vec2[word] for word in vec1)
94
+ norm1 = math.sqrt(sum(score * score for score in vec1.values()))
95
+ norm2 = math.sqrt(sum(score * score for score in vec2.values()))
96
+
97
+ if norm1 == 0 or norm2 == 0:
98
+ return 0.0
99
+ return dot_product / (norm1 * norm2)
100
+
101
+ def classify(self, text: str) -> List[Tuple[str, float]]:
102
+ """
103
+ Classify text into categories with confidence scores.
104
+
105
+ Returns:
106
+ List of (category, confidence) tuples, sorted by confidence
107
+ """
108
+ if not self.category_vectors:
109
+ raise ValueError("Classifier must be trained before classification")
110
+
111
+ # Calculate vector for input text
112
+ text_vector = self._calculate_vector(text)
113
+
114
+ # Calculate similarity with each category
115
+ similarities = [
116
+ (category, self._cosine_similarity(text_vector, category_vec))
117
+ for category, category_vec in self.category_vectors.items()
118
+ ]
119
+
120
+ # Sort by similarity score
121
+ return sorted(similarities, key=lambda x: x[1], reverse=True)
122
+
123
+
124
+ class TopicClassifier:
125
+ """Rule-based topic classifier using keyword matching."""
126
+
127
+ def __init__(self):
128
+ # Define topic keywords
129
+ self.topic_keywords = {
130
+ 'TECHNOLOGY': {
131
+ 'computer', 'software', 'hardware', 'internet', 'programming',
132
+ 'digital', 'data', 'algorithm', 'code', 'web', 'app', 'mobile',
133
+ 'cyber', 'robot', 'ai', 'artificial intelligence', 'machine learning'
134
+ },
135
+ 'SCIENCE': {
136
+ 'research', 'experiment', 'laboratory', 'scientific', 'physics',
137
+ 'chemistry', 'biology', 'mathematics', 'theory', 'hypothesis',
138
+ 'study', 'discovery', 'innovation', 'analysis', 'observation'
139
+ },
140
+ 'BUSINESS': {
141
+ 'company', 'market', 'finance', 'investment', 'stock', 'trade',
142
+ 'economy', 'business', 'corporate', 'startup', 'entrepreneur',
143
+ 'profit', 'revenue', 'management', 'strategy', 'commercial'
144
+ },
145
+ 'POLITICS': {
146
+ 'government', 'policy', 'election', 'political', 'democracy',
147
+ 'parliament', 'congress', 'law', 'legislation', 'party',
148
+ 'vote', 'campaign', 'president', 'minister', 'diplomatic'
149
+ },
150
+ 'SPORTS': {
151
+ 'game', 'team', 'player', 'competition', 'tournament',
152
+ 'championship', 'score', 'match', 'athlete', 'sport',
153
+ 'win', 'lose', 'victory', 'defeat', 'coach', 'training'
154
+ },
155
+ 'ENTERTAINMENT': {
156
+ 'movie', 'film', 'music', 'song', 'concert', 'actor',
157
+ 'actress', 'celebrity', 'show', 'performance', 'art',
158
+ 'entertainment', 'theater', 'dance', 'festival', 'media'
159
+ }
160
+ }
161
+
162
+ # Compile regex patterns for each topic
163
+ self.topic_patterns = {
164
+ topic: re.compile(r'\b(' + '|'.join(re.escape(kw) for kw in keywords) + r')\b', re.IGNORECASE)
165
+ for topic, keywords in self.topic_keywords.items()
166
+ }
167
+
168
+ def classify(self, text: str) -> List[Tuple[str, float]]:
169
+ """
170
+ Classify text into topics with confidence scores.
171
+
172
+ Returns:
173
+ List of (topic, confidence) tuples, sorted by confidence
174
+ """
175
+ # Count keyword matches for each topic
176
+ topic_matches = {
177
+ topic: len(pattern.findall(text))
178
+ for topic, pattern in self.topic_patterns.items()
179
+ }
180
+
181
+ # Calculate confidence scores
182
+ total_matches = sum(topic_matches.values()) or 1 # Avoid division by zero
183
+ topic_scores = [
184
+ (topic, count / total_matches)
185
+ for topic, count in topic_matches.items()
186
+ ]
187
+
188
+ # Sort by score
189
+ return sorted(topic_scores, key=lambda x: x[1], reverse=True)
webstoken/keywords.py ADDED
@@ -0,0 +1,216 @@
1
+ """
2
+ Keyword extraction module using statistical and graph-based approaches.
3
+ """
4
+
5
+ from typing import Dict, List, Set, Tuple
6
+ from collections import Counter, defaultdict
7
+ import math
8
+ import re
9
+
10
+ from .tokenizer import WordTokenizer
11
+ from .normalizer import TextNormalizer
12
+
13
+
14
+ class KeywordExtractor:
15
+ """Keyword extraction using TF-IDF and TextRank-inspired algorithms."""
16
+
17
+ def __init__(self):
18
+ self.word_tokenizer = WordTokenizer()
19
+ self.normalizer = TextNormalizer()
20
+
21
+ # Common words to filter out beyond basic stop words
22
+ self.filter_words: Set[str] = {
23
+ 'would', 'could', 'should', 'said', 'also', 'may', 'might',
24
+ 'must', 'need', 'shall', 'want', 'way', 'time', 'just',
25
+ 'now', 'like', 'make', 'made', 'well', 'back', 'even',
26
+ 'still', 'way', 'take', 'took', 'get', 'got', 'go', 'went'
27
+ }
28
+
29
+ def _split_into_sentences(self, text: str) -> List[str]:
30
+ """Split text into sentences using simple rules."""
31
+ text = re.sub(r'\s+', ' ', text)
32
+ sentences = re.split(r'[.!?]+', text)
33
+ return [s.strip() for s in sentences if s.strip()]
34
+
35
+ def _calculate_word_scores(self, text: str) -> Dict[str, float]:
36
+ """Calculate word importance scores using frequency and position."""
37
+ # Normalize and tokenize text
38
+ text = self.normalizer.normalize(text)
39
+ sentences = self._split_into_sentences(text)
40
+
41
+ word_scores: Dict[str, float] = defaultdict(float)
42
+ word_positions: Dict[str, List[int]] = defaultdict(list)
43
+
44
+ # Calculate word frequencies and positions
45
+ for i, sentence in enumerate(sentences):
46
+ words = self.word_tokenizer.tokenize(sentence)
47
+ for j, word in enumerate(words):
48
+ word = word.lower()
49
+ if (word.isalnum() and
50
+ len(word) > 2 and
51
+ word not in self.filter_words and
52
+ word not in self.normalizer.stop_words):
53
+ word_scores[word] += 1
54
+ word_positions[word].append(i)
55
+
56
+ # Adjust scores based on position
57
+ num_sentences = len(sentences)
58
+ for word, positions in word_positions.items():
59
+ # Words appearing in first or last sentences get bonus
60
+ if 0 in positions:
61
+ word_scores[word] *= 1.2
62
+ if num_sentences - 1 in positions:
63
+ word_scores[word] *= 1.1
64
+
65
+ # Words appearing throughout text get bonus
66
+ coverage = len(set(positions)) / num_sentences
67
+ word_scores[word] *= (1 + coverage)
68
+
69
+ return word_scores
70
+
71
+ def _calculate_word_cooccurrence(self, text: str, window_size: int = 3) -> Dict[str, Dict[str, int]]:
72
+ """Calculate word co-occurrence matrix."""
73
+ # Normalize and tokenize text
74
+ text = self.normalizer.normalize(text)
75
+ words = self.word_tokenizer.tokenize(text)
76
+
77
+ # Filter words
78
+ filtered_words = [
79
+ word.lower() for word in words
80
+ if (word.isalnum() and
81
+ len(word) > 2 and
82
+ word.lower() not in self.filter_words and
83
+ word.lower() not in self.normalizer.stop_words)
84
+ ]
85
+
86
+ # Build co-occurrence matrix
87
+ cooccurrence: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int))
88
+
89
+ for i, word in enumerate(filtered_words):
90
+ for j in range(max(0, i - window_size), min(len(filtered_words), i + window_size + 1)):
91
+ if i != j:
92
+ cooccurrence[word][filtered_words[j]] += 1
93
+ cooccurrence[filtered_words[j]][word] += 1
94
+
95
+ return cooccurrence
96
+
97
+ def _textrank_scores(self, cooccurrence: Dict[str, Dict[str, int]], damping: float = 0.85,
98
+ iterations: int = 30) -> Dict[str, float]:
99
+ """Calculate TextRank scores from co-occurrence matrix."""
100
+ scores = {word: 1.0 for word in cooccurrence}
101
+
102
+ for _ in range(iterations):
103
+ new_scores = {}
104
+ for word in scores:
105
+ if not cooccurrence[word]:
106
+ continue
107
+
108
+ incoming_score = sum(
109
+ scores[other] * cooccurrence[word][other] / sum(cooccurrence[other].values())
110
+ for other in cooccurrence[word]
111
+ )
112
+ new_scores[word] = (1 - damping) + damping * incoming_score
113
+
114
+ # Check convergence
115
+ score_diff = sum(abs(new_scores[w] - scores[w]) for w in scores)
116
+ scores = new_scores
117
+ if score_diff < 0.0001:
118
+ break
119
+
120
+ return scores
121
+
122
+ def extract_keywords(self, text: str, num_keywords: int = 10,
123
+ use_textrank: bool = True) -> List[Tuple[str, float]]:
124
+ """
125
+ Extract keywords from text using combined frequency and graph-based approach.
126
+
127
+ Args:
128
+ text: Input text
129
+ num_keywords: Number of keywords to return
130
+ use_textrank: Whether to use TextRank algorithm
131
+
132
+ Returns:
133
+ List of (keyword, score) tuples, sorted by score
134
+ """
135
+ if not text:
136
+ return []
137
+
138
+ # Get frequency-based scores
139
+ freq_scores = self._calculate_word_scores(text)
140
+
141
+ if use_textrank:
142
+ # Get TextRank scores
143
+ cooccurrence = self._calculate_word_cooccurrence(text)
144
+ textrank_scores = self._textrank_scores(cooccurrence)
145
+
146
+ # Combine scores
147
+ combined_scores = {
148
+ word: freq_scores[word] * textrank_scores.get(word, 0)
149
+ for word in freq_scores
150
+ }
151
+ else:
152
+ combined_scores = freq_scores
153
+
154
+ # Sort and return top keywords
155
+ sorted_words = sorted(
156
+ combined_scores.items(),
157
+ key=lambda x: x[1],
158
+ reverse=True
159
+ )
160
+
161
+ return sorted_words[:num_keywords]
162
+
163
+ def extract_keyphrases(self, text: str, num_phrases: int = 5,
164
+ min_words: int = 2, max_words: int = 4) -> List[Tuple[str, float]]:
165
+ """
166
+ Extract key phrases from text.
167
+
168
+ Args:
169
+ text: Input text
170
+ num_phrases: Number of phrases to return
171
+ min_words: Minimum words in phrase
172
+ max_words: Maximum words in phrase
173
+
174
+ Returns:
175
+ List of (phrase, score) tuples, sorted by score
176
+ """
177
+ # Normalize and split into sentences
178
+ text = self.normalizer.normalize(text)
179
+ sentences = self._split_into_sentences(text)
180
+
181
+ # Get word importance scores
182
+ word_scores = self._calculate_word_scores(text)
183
+
184
+ # Extract candidate phrases
185
+ phrases: Dict[str, float] = {}
186
+
187
+ for sentence in sentences:
188
+ words = self.word_tokenizer.tokenize(sentence)
189
+
190
+ # Generate phrases of different lengths
191
+ for i in range(len(words)):
192
+ for length in range(min_words, min(max_words + 1, len(words) - i + 1)):
193
+ phrase_words = words[i:i+length]
194
+
195
+ # Filter phrases
196
+ if all(
197
+ word.isalnum() and
198
+ len(word) > 2 and
199
+ word.lower() not in self.filter_words and
200
+ word.lower() not in self.normalizer.stop_words
201
+ for word in phrase_words
202
+ ):
203
+ phrase = ' '.join(phrase_words)
204
+ # Score is average of word scores
205
+ score = sum(word_scores.get(word.lower(), 0) for word in phrase_words)
206
+ score /= len(phrase_words)
207
+ phrases[phrase] = score
208
+
209
+ # Sort and return top phrases
210
+ sorted_phrases = sorted(
211
+ phrases.items(),
212
+ key=lambda x: x[1],
213
+ reverse=True
214
+ )
215
+
216
+ return sorted_phrases[:num_phrases]