speaksy 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- speaksy/core.py +39 -37
- {speaksy-0.1.0.dist-info → speaksy-0.1.1.dist-info}/METADATA +1 -1
- {speaksy-0.1.0.dist-info → speaksy-0.1.1.dist-info}/RECORD +6 -6
- {speaksy-0.1.0.dist-info → speaksy-0.1.1.dist-info}/WHEEL +0 -0
- {speaksy-0.1.0.dist-info → speaksy-0.1.1.dist-info}/entry_points.txt +0 -0
- {speaksy-0.1.0.dist-info → speaksy-0.1.1.dist-info}/licenses/LICENSE +0 -0
speaksy/core.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
import io
|
|
4
4
|
import logging
|
|
5
5
|
import os
|
|
6
|
+
import re
|
|
6
7
|
import signal
|
|
7
8
|
import subprocess
|
|
8
9
|
import sys
|
|
@@ -198,52 +199,53 @@ def route_transcription(audio_buf, groq, local, config):
|
|
|
198
199
|
# Text Cleanup (LLM post-processing)
|
|
199
200
|
# ---------------------------------------------------------------------------
|
|
200
201
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
"
|
|
204
|
-
"
|
|
205
|
-
"
|
|
206
|
-
"
|
|
207
|
-
)
|
|
202
|
+
# Filler words to remove (with word boundaries)
|
|
203
|
+
FILLER_PATTERNS = [
|
|
204
|
+
r"\b(um+|uh+|er+|ah+)\b",
|
|
205
|
+
r"\b(like,?\s+)(?=\w)", # "like" as filler, not "I like pizza"
|
|
206
|
+
r"\b(you know,?\s*)",
|
|
207
|
+
r"\b(basically,?\s*)",
|
|
208
|
+
r"\b(actually,?\s*)(?![\w])", # "actually" as filler
|
|
209
|
+
r"\b(so,?\s+)(?=[a-z])", # "so" at start as filler
|
|
210
|
+
r"\b(i mean,?\s*)",
|
|
211
|
+
r"\b(kind of|kinda)\s+",
|
|
212
|
+
r"\b(sort of|sorta)\s+",
|
|
213
|
+
]
|
|
208
214
|
|
|
209
215
|
|
|
210
216
|
class TextCleaner:
|
|
211
|
-
"""
|
|
217
|
+
"""Clean up transcribed text using simple regex rules."""
|
|
212
218
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
self.api_key = api_key
|
|
217
|
-
self.model = model
|
|
219
|
+
def __init__(self, api_key=None, model=None):
|
|
220
|
+
# API key and model not used - kept for backward compatibility
|
|
221
|
+
pass
|
|
218
222
|
|
|
219
223
|
def clean(self, text):
|
|
220
|
-
if not
|
|
224
|
+
if not text:
|
|
221
225
|
return text
|
|
222
226
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
cleaned
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
except Exception as e:
|
|
244
|
-
log.warning(f"Text cleanup failed ({e}), using raw transcription")
|
|
227
|
+
original = text
|
|
228
|
+
cleaned = text
|
|
229
|
+
|
|
230
|
+
# Remove filler words
|
|
231
|
+
for pattern in FILLER_PATTERNS:
|
|
232
|
+
cleaned = re.sub(pattern, "", cleaned, flags=re.IGNORECASE)
|
|
233
|
+
|
|
234
|
+
# Clean up multiple spaces
|
|
235
|
+
cleaned = re.sub(r"\s+", " ", cleaned).strip()
|
|
236
|
+
|
|
237
|
+
# Capitalize first letter
|
|
238
|
+
if cleaned:
|
|
239
|
+
cleaned = cleaned[0].upper() + cleaned[1:]
|
|
240
|
+
|
|
241
|
+
# Add period if no ending punctuation
|
|
242
|
+
if cleaned and cleaned[-1] not in ".!?":
|
|
243
|
+
cleaned += "."
|
|
244
|
+
|
|
245
|
+
if cleaned != original:
|
|
246
|
+
log.info(f'[Cleanup] "{original}" -> "{cleaned}"')
|
|
245
247
|
|
|
246
|
-
return
|
|
248
|
+
return cleaned
|
|
247
249
|
|
|
248
250
|
|
|
249
251
|
# ---------------------------------------------------------------------------
|
|
@@ -2,12 +2,12 @@ speaksy/__init__.py,sha256=eG5kdi4U1tayyWwhBUByiHrdFcZpCJ6bninHJclA4VU,63
|
|
|
2
2
|
speaksy/__main__.py,sha256=Y5c4dtERDNv0x1C-HcP1nauSAbssfyuP7MlyN8l1M28,110
|
|
3
3
|
speaksy/cli.py,sha256=Fd-g5g11RMFyKgpdYGtvQAuYIVY6BcL7nyOwFa5GMHM,10225
|
|
4
4
|
speaksy/config.py,sha256=bGiATdV79E27pu7TQTm2VxDDdAKYdzjQrrIbu5GCz2o,4163
|
|
5
|
-
speaksy/core.py,sha256=
|
|
5
|
+
speaksy/core.py,sha256=NxbGWjKWkTXJdoS9pEL_qLchNttKF4R_HkIW19VdEkY,16695
|
|
6
6
|
speaksy/runner.py,sha256=l5aiczErHVLBc3Ir3XIfqxZUAiq4tN6T8OCVqTWwnEU,643
|
|
7
7
|
speaksy/service.py,sha256=EwQs3yB3Ys-WwvP0UI2K_U5_zJqJCJLkK29_MTTcVTE,5449
|
|
8
8
|
speaksy/setup_wizard.py,sha256=-Glu74R-ZKL097Ji8vOmEDC7salaSk9kueONxWs0t4c,7033
|
|
9
|
-
speaksy-0.1.
|
|
10
|
-
speaksy-0.1.
|
|
11
|
-
speaksy-0.1.
|
|
12
|
-
speaksy-0.1.
|
|
13
|
-
speaksy-0.1.
|
|
9
|
+
speaksy-0.1.1.dist-info/METADATA,sha256=LnGcQYfq7p-p3ojsCxTuiuwJjxOvNui2e83yUfYwKQE,6926
|
|
10
|
+
speaksy-0.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
11
|
+
speaksy-0.1.1.dist-info/entry_points.txt,sha256=LFch1KPmgD4hHNVXY_DSJ24fPnB8GaVKF4aKUrqDjzs,45
|
|
12
|
+
speaksy-0.1.1.dist-info/licenses/LICENSE,sha256=KgmDIQPh17s8aGNha9ebeUXZHi533ew6VyCLcY7IJE4,1066
|
|
13
|
+
speaksy-0.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|