webscout 7.1__py3-none-any.whl → 7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (154) hide show
  1. webscout/AIauto.py +191 -191
  2. webscout/AIbase.py +122 -122
  3. webscout/AIutel.py +440 -440
  4. webscout/Bard.py +343 -161
  5. webscout/DWEBS.py +489 -492
  6. webscout/Extra/YTToolkit/YTdownloader.py +995 -995
  7. webscout/Extra/YTToolkit/__init__.py +2 -2
  8. webscout/Extra/YTToolkit/transcriber.py +476 -479
  9. webscout/Extra/YTToolkit/ytapi/channel.py +307 -307
  10. webscout/Extra/YTToolkit/ytapi/playlist.py +58 -58
  11. webscout/Extra/YTToolkit/ytapi/pool.py +7 -7
  12. webscout/Extra/YTToolkit/ytapi/utils.py +62 -62
  13. webscout/Extra/YTToolkit/ytapi/video.py +103 -103
  14. webscout/Extra/autocoder/__init__.py +9 -9
  15. webscout/Extra/autocoder/autocoder_utiles.py +199 -199
  16. webscout/Extra/autocoder/rawdog.py +5 -7
  17. webscout/Extra/autollama.py +230 -230
  18. webscout/Extra/gguf.py +3 -3
  19. webscout/Extra/weather.py +171 -171
  20. webscout/LLM.py +442 -442
  21. webscout/Litlogger/__init__.py +67 -681
  22. webscout/Litlogger/core/__init__.py +6 -0
  23. webscout/Litlogger/core/level.py +23 -0
  24. webscout/Litlogger/core/logger.py +166 -0
  25. webscout/Litlogger/handlers/__init__.py +12 -0
  26. webscout/Litlogger/handlers/console.py +33 -0
  27. webscout/Litlogger/handlers/file.py +143 -0
  28. webscout/Litlogger/handlers/network.py +173 -0
  29. webscout/Litlogger/styles/__init__.py +7 -0
  30. webscout/Litlogger/styles/colors.py +249 -0
  31. webscout/Litlogger/styles/formats.py +460 -0
  32. webscout/Litlogger/styles/text.py +87 -0
  33. webscout/Litlogger/utils/__init__.py +6 -0
  34. webscout/Litlogger/utils/detectors.py +154 -0
  35. webscout/Litlogger/utils/formatters.py +200 -0
  36. webscout/Provider/AISEARCH/DeepFind.py +250 -250
  37. webscout/Provider/AISEARCH/ISou.py +277 -0
  38. webscout/Provider/AISEARCH/__init__.py +2 -1
  39. webscout/Provider/Blackboxai.py +3 -3
  40. webscout/Provider/ChatGPTGratis.py +226 -0
  41. webscout/Provider/Cloudflare.py +3 -4
  42. webscout/Provider/DeepSeek.py +218 -0
  43. webscout/Provider/Deepinfra.py +40 -24
  44. webscout/Provider/Free2GPT.py +131 -124
  45. webscout/Provider/Gemini.py +100 -115
  46. webscout/Provider/Glider.py +3 -3
  47. webscout/Provider/Groq.py +5 -1
  48. webscout/Provider/Jadve.py +3 -3
  49. webscout/Provider/Marcus.py +191 -192
  50. webscout/Provider/Netwrck.py +3 -3
  51. webscout/Provider/PI.py +2 -2
  52. webscout/Provider/PizzaGPT.py +2 -3
  53. webscout/Provider/QwenLM.py +311 -0
  54. webscout/Provider/TTI/AiForce/__init__.py +22 -22
  55. webscout/Provider/TTI/AiForce/async_aiforce.py +257 -257
  56. webscout/Provider/TTI/AiForce/sync_aiforce.py +242 -242
  57. webscout/Provider/TTI/FreeAIPlayground/__init__.py +9 -0
  58. webscout/Provider/TTI/FreeAIPlayground/async_freeaiplayground.py +206 -0
  59. webscout/Provider/TTI/FreeAIPlayground/sync_freeaiplayground.py +192 -0
  60. webscout/Provider/TTI/Nexra/__init__.py +22 -22
  61. webscout/Provider/TTI/Nexra/async_nexra.py +286 -286
  62. webscout/Provider/TTI/Nexra/sync_nexra.py +258 -258
  63. webscout/Provider/TTI/PollinationsAI/__init__.py +23 -23
  64. webscout/Provider/TTI/PollinationsAI/async_pollinations.py +330 -330
  65. webscout/Provider/TTI/PollinationsAI/sync_pollinations.py +285 -285
  66. webscout/Provider/TTI/__init__.py +2 -1
  67. webscout/Provider/TTI/artbit/__init__.py +22 -22
  68. webscout/Provider/TTI/artbit/async_artbit.py +184 -184
  69. webscout/Provider/TTI/artbit/sync_artbit.py +176 -176
  70. webscout/Provider/TTI/blackbox/__init__.py +4 -4
  71. webscout/Provider/TTI/blackbox/async_blackbox.py +212 -212
  72. webscout/Provider/TTI/blackbox/sync_blackbox.py +199 -199
  73. webscout/Provider/TTI/deepinfra/__init__.py +4 -4
  74. webscout/Provider/TTI/deepinfra/async_deepinfra.py +227 -227
  75. webscout/Provider/TTI/deepinfra/sync_deepinfra.py +199 -199
  76. webscout/Provider/TTI/huggingface/__init__.py +22 -22
  77. webscout/Provider/TTI/huggingface/async_huggingface.py +199 -199
  78. webscout/Provider/TTI/huggingface/sync_huggingface.py +195 -195
  79. webscout/Provider/TTI/imgninza/__init__.py +4 -4
  80. webscout/Provider/TTI/imgninza/async_ninza.py +214 -214
  81. webscout/Provider/TTI/imgninza/sync_ninza.py +209 -209
  82. webscout/Provider/TTI/talkai/__init__.py +4 -4
  83. webscout/Provider/TTI/talkai/async_talkai.py +229 -229
  84. webscout/Provider/TTI/talkai/sync_talkai.py +207 -207
  85. webscout/Provider/TTS/deepgram.py +182 -182
  86. webscout/Provider/TTS/elevenlabs.py +136 -136
  87. webscout/Provider/TTS/gesserit.py +150 -150
  88. webscout/Provider/TTS/murfai.py +138 -138
  89. webscout/Provider/TTS/parler.py +133 -134
  90. webscout/Provider/TTS/streamElements.py +360 -360
  91. webscout/Provider/TTS/utils.py +280 -280
  92. webscout/Provider/TTS/voicepod.py +116 -116
  93. webscout/Provider/TextPollinationsAI.py +28 -8
  94. webscout/Provider/WiseCat.py +193 -0
  95. webscout/Provider/__init__.py +146 -134
  96. webscout/Provider/cerebras.py +242 -227
  97. webscout/Provider/chatglm.py +204 -204
  98. webscout/Provider/dgaf.py +2 -3
  99. webscout/Provider/freeaichat.py +221 -0
  100. webscout/Provider/gaurish.py +2 -3
  101. webscout/Provider/geminiapi.py +208 -208
  102. webscout/Provider/granite.py +223 -0
  103. webscout/Provider/hermes.py +218 -218
  104. webscout/Provider/llama3mitril.py +179 -179
  105. webscout/Provider/llamatutor.py +3 -3
  106. webscout/Provider/llmchat.py +2 -3
  107. webscout/Provider/meta.py +794 -794
  108. webscout/Provider/multichat.py +331 -331
  109. webscout/Provider/typegpt.py +359 -359
  110. webscout/Provider/yep.py +3 -3
  111. webscout/__init__.py +1 -0
  112. webscout/__main__.py +5 -5
  113. webscout/cli.py +319 -319
  114. webscout/conversation.py +241 -242
  115. webscout/exceptions.py +328 -328
  116. webscout/litagent/__init__.py +28 -28
  117. webscout/litagent/agent.py +2 -3
  118. webscout/litprinter/__init__.py +0 -58
  119. webscout/scout/__init__.py +8 -8
  120. webscout/scout/core.py +884 -884
  121. webscout/scout/element.py +459 -459
  122. webscout/scout/parsers/__init__.py +69 -69
  123. webscout/scout/parsers/html5lib_parser.py +172 -172
  124. webscout/scout/parsers/html_parser.py +236 -236
  125. webscout/scout/parsers/lxml_parser.py +178 -178
  126. webscout/scout/utils.py +38 -38
  127. webscout/swiftcli/__init__.py +811 -811
  128. webscout/update_checker.py +2 -12
  129. webscout/version.py +1 -1
  130. webscout/webscout_search.py +87 -6
  131. webscout/webscout_search_async.py +58 -1
  132. webscout/yep_search.py +297 -0
  133. webscout/zeroart/__init__.py +54 -54
  134. webscout/zeroart/base.py +60 -60
  135. webscout/zeroart/effects.py +99 -99
  136. webscout/zeroart/fonts.py +816 -816
  137. {webscout-7.1.dist-info → webscout-7.3.dist-info}/METADATA +62 -22
  138. webscout-7.3.dist-info/RECORD +223 -0
  139. {webscout-7.1.dist-info → webscout-7.3.dist-info}/WHEEL +1 -1
  140. webstoken/__init__.py +30 -30
  141. webstoken/classifier.py +189 -189
  142. webstoken/keywords.py +216 -216
  143. webstoken/language.py +128 -128
  144. webstoken/ner.py +164 -164
  145. webstoken/normalizer.py +35 -35
  146. webstoken/processor.py +77 -77
  147. webstoken/sentiment.py +206 -206
  148. webstoken/stemmer.py +73 -73
  149. webstoken/tagger.py +60 -60
  150. webstoken/tokenizer.py +158 -158
  151. webscout-7.1.dist-info/RECORD +0 -198
  152. {webscout-7.1.dist-info → webscout-7.3.dist-info}/LICENSE.md +0 -0
  153. {webscout-7.1.dist-info → webscout-7.3.dist-info}/entry_points.txt +0 -0
  154. {webscout-7.1.dist-info → webscout-7.3.dist-info}/top_level.txt +0 -0
webstoken/classifier.py CHANGED
@@ -1,189 +1,189 @@
1
- """
2
- Text classification module using rule-based and statistical approaches.
3
- """
4
-
5
- from typing import Dict, List, Set, Tuple
6
- from collections import Counter
7
- import math
8
- import re
9
-
10
- from .normalizer import TextNormalizer
11
- from .tokenizer import WordTokenizer
12
-
13
-
14
- class TextClassifier:
15
- """Simple text classifier using TF-IDF and cosine similarity."""
16
-
17
- def __init__(self):
18
- self.word_tokenizer = WordTokenizer()
19
- self.normalizer = TextNormalizer()
20
- self.documents: Dict[str, List[str]] = {} # category -> list of documents
21
- self.vocabulary: Set[str] = set()
22
- self.idf_scores: Dict[str, float] = {}
23
- self.category_vectors: Dict[str, Dict[str, float]] = {}
24
-
25
- def train(self, documents: Dict[str, List[str]]) -> None:
26
- """
27
- Train the classifier on labeled documents.
28
-
29
- Args:
30
- documents: Dict mapping categories to lists of documents
31
- """
32
- self.documents = documents
33
-
34
- # Build vocabulary and document frequencies
35
- doc_frequencies: Dict[str, int] = Counter()
36
- total_docs = sum(len(docs) for docs in documents.values())
37
-
38
- for category, docs in documents.items():
39
- for doc in docs:
40
- # Normalize and tokenize
41
- doc = self.normalizer.normalize(doc)
42
- tokens = self.word_tokenizer.tokenize(doc)
43
-
44
- # Update vocabulary and document frequencies
45
- unique_tokens = set(tokens)
46
- self.vocabulary.update(unique_tokens)
47
- doc_frequencies.update(unique_tokens)
48
-
49
- # Calculate IDF scores
50
- self.idf_scores = {
51
- word: math.log(total_docs / (freq + 1))
52
- for word, freq in doc_frequencies.items()
53
- }
54
-
55
- # Calculate TF-IDF vectors for each category
56
- for category, docs in documents.items():
57
- category_vector: Dict[str, float] = {word: 0.0 for word in self.vocabulary}
58
-
59
- for doc in docs:
60
- # Get term frequencies
61
- doc = self.normalizer.normalize(doc)
62
- tokens = self.word_tokenizer.tokenize(doc)
63
- term_freqs = Counter(tokens)
64
-
65
- # Update category vector with TF-IDF scores
66
- for word, tf in term_freqs.items():
67
- if word in self.idf_scores:
68
- category_vector[word] += tf * self.idf_scores[word]
69
-
70
- # Average the scores
71
- for word in category_vector:
72
- category_vector[word] /= len(docs)
73
-
74
- self.category_vectors[category] = category_vector
75
-
76
- def _calculate_vector(self, text: str) -> Dict[str, float]:
77
- """Calculate TF-IDF vector for input text."""
78
- # Normalize and tokenize
79
- text = self.normalizer.normalize(text)
80
- tokens = self.word_tokenizer.tokenize(text)
81
- term_freqs = Counter(tokens)
82
-
83
- # Calculate TF-IDF scores
84
- vector = {word: 0.0 for word in self.vocabulary}
85
- for word, tf in term_freqs.items():
86
- if word in self.idf_scores:
87
- vector[word] = tf * self.idf_scores[word]
88
-
89
- return vector
90
-
91
- def _cosine_similarity(self, vec1: Dict[str, float], vec2: Dict[str, float]) -> float:
92
- """Calculate cosine similarity between two vectors."""
93
- dot_product = sum(vec1[word] * vec2[word] for word in vec1)
94
- norm1 = math.sqrt(sum(score * score for score in vec1.values()))
95
- norm2 = math.sqrt(sum(score * score for score in vec2.values()))
96
-
97
- if norm1 == 0 or norm2 == 0:
98
- return 0.0
99
- return dot_product / (norm1 * norm2)
100
-
101
- def classify(self, text: str) -> List[Tuple[str, float]]:
102
- """
103
- Classify text into categories with confidence scores.
104
-
105
- Returns:
106
- List of (category, confidence) tuples, sorted by confidence
107
- """
108
- if not self.category_vectors:
109
- raise ValueError("Classifier must be trained before classification")
110
-
111
- # Calculate vector for input text
112
- text_vector = self._calculate_vector(text)
113
-
114
- # Calculate similarity with each category
115
- similarities = [
116
- (category, self._cosine_similarity(text_vector, category_vec))
117
- for category, category_vec in self.category_vectors.items()
118
- ]
119
-
120
- # Sort by similarity score
121
- return sorted(similarities, key=lambda x: x[1], reverse=True)
122
-
123
-
124
- class TopicClassifier:
125
- """Rule-based topic classifier using keyword matching."""
126
-
127
- def __init__(self):
128
- # Define topic keywords
129
- self.topic_keywords = {
130
- 'TECHNOLOGY': {
131
- 'computer', 'software', 'hardware', 'internet', 'programming',
132
- 'digital', 'data', 'algorithm', 'code', 'web', 'app', 'mobile',
133
- 'cyber', 'robot', 'ai', 'artificial intelligence', 'machine learning'
134
- },
135
- 'SCIENCE': {
136
- 'research', 'experiment', 'laboratory', 'scientific', 'physics',
137
- 'chemistry', 'biology', 'mathematics', 'theory', 'hypothesis',
138
- 'study', 'discovery', 'innovation', 'analysis', 'observation'
139
- },
140
- 'BUSINESS': {
141
- 'company', 'market', 'finance', 'investment', 'stock', 'trade',
142
- 'economy', 'business', 'corporate', 'startup', 'entrepreneur',
143
- 'profit', 'revenue', 'management', 'strategy', 'commercial'
144
- },
145
- 'POLITICS': {
146
- 'government', 'policy', 'election', 'political', 'democracy',
147
- 'parliament', 'congress', 'law', 'legislation', 'party',
148
- 'vote', 'campaign', 'president', 'minister', 'diplomatic'
149
- },
150
- 'SPORTS': {
151
- 'game', 'team', 'player', 'competition', 'tournament',
152
- 'championship', 'score', 'match', 'athlete', 'sport',
153
- 'win', 'lose', 'victory', 'defeat', 'coach', 'training'
154
- },
155
- 'ENTERTAINMENT': {
156
- 'movie', 'film', 'music', 'song', 'concert', 'actor',
157
- 'actress', 'celebrity', 'show', 'performance', 'art',
158
- 'entertainment', 'theater', 'dance', 'festival', 'media'
159
- }
160
- }
161
-
162
- # Compile regex patterns for each topic
163
- self.topic_patterns = {
164
- topic: re.compile(r'\b(' + '|'.join(re.escape(kw) for kw in keywords) + r')\b', re.IGNORECASE)
165
- for topic, keywords in self.topic_keywords.items()
166
- }
167
-
168
- def classify(self, text: str) -> List[Tuple[str, float]]:
169
- """
170
- Classify text into topics with confidence scores.
171
-
172
- Returns:
173
- List of (topic, confidence) tuples, sorted by confidence
174
- """
175
- # Count keyword matches for each topic
176
- topic_matches = {
177
- topic: len(pattern.findall(text))
178
- for topic, pattern in self.topic_patterns.items()
179
- }
180
-
181
- # Calculate confidence scores
182
- total_matches = sum(topic_matches.values()) or 1 # Avoid division by zero
183
- topic_scores = [
184
- (topic, count / total_matches)
185
- for topic, count in topic_matches.items()
186
- ]
187
-
188
- # Sort by score
189
- return sorted(topic_scores, key=lambda x: x[1], reverse=True)
1
+ """
2
+ Text classification module using rule-based and statistical approaches.
3
+ """
4
+
5
+ from typing import Dict, List, Set, Tuple
6
+ from collections import Counter
7
+ import math
8
+ import re
9
+
10
+ from .normalizer import TextNormalizer
11
+ from .tokenizer import WordTokenizer
12
+
13
+
14
+ class TextClassifier:
15
+ """Simple text classifier using TF-IDF and cosine similarity."""
16
+
17
+ def __init__(self):
18
+ self.word_tokenizer = WordTokenizer()
19
+ self.normalizer = TextNormalizer()
20
+ self.documents: Dict[str, List[str]] = {} # category -> list of documents
21
+ self.vocabulary: Set[str] = set()
22
+ self.idf_scores: Dict[str, float] = {}
23
+ self.category_vectors: Dict[str, Dict[str, float]] = {}
24
+
25
+ def train(self, documents: Dict[str, List[str]]) -> None:
26
+ """
27
+ Train the classifier on labeled documents.
28
+
29
+ Args:
30
+ documents: Dict mapping categories to lists of documents
31
+ """
32
+ self.documents = documents
33
+
34
+ # Build vocabulary and document frequencies
35
+ doc_frequencies: Dict[str, int] = Counter()
36
+ total_docs = sum(len(docs) for docs in documents.values())
37
+
38
+ for category, docs in documents.items():
39
+ for doc in docs:
40
+ # Normalize and tokenize
41
+ doc = self.normalizer.normalize(doc)
42
+ tokens = self.word_tokenizer.tokenize(doc)
43
+
44
+ # Update vocabulary and document frequencies
45
+ unique_tokens = set(tokens)
46
+ self.vocabulary.update(unique_tokens)
47
+ doc_frequencies.update(unique_tokens)
48
+
49
+ # Calculate IDF scores
50
+ self.idf_scores = {
51
+ word: math.log(total_docs / (freq + 1))
52
+ for word, freq in doc_frequencies.items()
53
+ }
54
+
55
+ # Calculate TF-IDF vectors for each category
56
+ for category, docs in documents.items():
57
+ category_vector: Dict[str, float] = {word: 0.0 for word in self.vocabulary}
58
+
59
+ for doc in docs:
60
+ # Get term frequencies
61
+ doc = self.normalizer.normalize(doc)
62
+ tokens = self.word_tokenizer.tokenize(doc)
63
+ term_freqs = Counter(tokens)
64
+
65
+ # Update category vector with TF-IDF scores
66
+ for word, tf in term_freqs.items():
67
+ if word in self.idf_scores:
68
+ category_vector[word] += tf * self.idf_scores[word]
69
+
70
+ # Average the scores
71
+ for word in category_vector:
72
+ category_vector[word] /= len(docs)
73
+
74
+ self.category_vectors[category] = category_vector
75
+
76
+ def _calculate_vector(self, text: str) -> Dict[str, float]:
77
+ """Calculate TF-IDF vector for input text."""
78
+ # Normalize and tokenize
79
+ text = self.normalizer.normalize(text)
80
+ tokens = self.word_tokenizer.tokenize(text)
81
+ term_freqs = Counter(tokens)
82
+
83
+ # Calculate TF-IDF scores
84
+ vector = {word: 0.0 for word in self.vocabulary}
85
+ for word, tf in term_freqs.items():
86
+ if word in self.idf_scores:
87
+ vector[word] = tf * self.idf_scores[word]
88
+
89
+ return vector
90
+
91
+ def _cosine_similarity(self, vec1: Dict[str, float], vec2: Dict[str, float]) -> float:
92
+ """Calculate cosine similarity between two vectors."""
93
+ dot_product = sum(vec1[word] * vec2[word] for word in vec1)
94
+ norm1 = math.sqrt(sum(score * score for score in vec1.values()))
95
+ norm2 = math.sqrt(sum(score * score for score in vec2.values()))
96
+
97
+ if norm1 == 0 or norm2 == 0:
98
+ return 0.0
99
+ return dot_product / (norm1 * norm2)
100
+
101
+ def classify(self, text: str) -> List[Tuple[str, float]]:
102
+ """
103
+ Classify text into categories with confidence scores.
104
+
105
+ Returns:
106
+ List of (category, confidence) tuples, sorted by confidence
107
+ """
108
+ if not self.category_vectors:
109
+ raise ValueError("Classifier must be trained before classification")
110
+
111
+ # Calculate vector for input text
112
+ text_vector = self._calculate_vector(text)
113
+
114
+ # Calculate similarity with each category
115
+ similarities = [
116
+ (category, self._cosine_similarity(text_vector, category_vec))
117
+ for category, category_vec in self.category_vectors.items()
118
+ ]
119
+
120
+ # Sort by similarity score
121
+ return sorted(similarities, key=lambda x: x[1], reverse=True)
122
+
123
+
124
+ class TopicClassifier:
125
+ """Rule-based topic classifier using keyword matching."""
126
+
127
+ def __init__(self):
128
+ # Define topic keywords
129
+ self.topic_keywords = {
130
+ 'TECHNOLOGY': {
131
+ 'computer', 'software', 'hardware', 'internet', 'programming',
132
+ 'digital', 'data', 'algorithm', 'code', 'web', 'app', 'mobile',
133
+ 'cyber', 'robot', 'ai', 'artificial intelligence', 'machine learning'
134
+ },
135
+ 'SCIENCE': {
136
+ 'research', 'experiment', 'laboratory', 'scientific', 'physics',
137
+ 'chemistry', 'biology', 'mathematics', 'theory', 'hypothesis',
138
+ 'study', 'discovery', 'innovation', 'analysis', 'observation'
139
+ },
140
+ 'BUSINESS': {
141
+ 'company', 'market', 'finance', 'investment', 'stock', 'trade',
142
+ 'economy', 'business', 'corporate', 'startup', 'entrepreneur',
143
+ 'profit', 'revenue', 'management', 'strategy', 'commercial'
144
+ },
145
+ 'POLITICS': {
146
+ 'government', 'policy', 'election', 'political', 'democracy',
147
+ 'parliament', 'congress', 'law', 'legislation', 'party',
148
+ 'vote', 'campaign', 'president', 'minister', 'diplomatic'
149
+ },
150
+ 'SPORTS': {
151
+ 'game', 'team', 'player', 'competition', 'tournament',
152
+ 'championship', 'score', 'match', 'athlete', 'sport',
153
+ 'win', 'lose', 'victory', 'defeat', 'coach', 'training'
154
+ },
155
+ 'ENTERTAINMENT': {
156
+ 'movie', 'film', 'music', 'song', 'concert', 'actor',
157
+ 'actress', 'celebrity', 'show', 'performance', 'art',
158
+ 'entertainment', 'theater', 'dance', 'festival', 'media'
159
+ }
160
+ }
161
+
162
+ # Compile regex patterns for each topic
163
+ self.topic_patterns = {
164
+ topic: re.compile(r'\b(' + '|'.join(re.escape(kw) for kw in keywords) + r')\b', re.IGNORECASE)
165
+ for topic, keywords in self.topic_keywords.items()
166
+ }
167
+
168
+ def classify(self, text: str) -> List[Tuple[str, float]]:
169
+ """
170
+ Classify text into topics with confidence scores.
171
+
172
+ Returns:
173
+ List of (topic, confidence) tuples, sorted by confidence
174
+ """
175
+ # Count keyword matches for each topic
176
+ topic_matches = {
177
+ topic: len(pattern.findall(text))
178
+ for topic, pattern in self.topic_patterns.items()
179
+ }
180
+
181
+ # Calculate confidence scores
182
+ total_matches = sum(topic_matches.values()) or 1 # Avoid division by zero
183
+ topic_scores = [
184
+ (topic, count / total_matches)
185
+ for topic, count in topic_matches.items()
186
+ ]
187
+
188
+ # Sort by score
189
+ return sorted(topic_scores, key=lambda x: x[1], reverse=True)