speaksy 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,7 +13,9 @@
13
13
  "Bash(git init:*)",
14
14
  "Bash(git branch:*)",
15
15
  "Bash(git rm:*)",
16
- "Bash(python -m build:*)"
16
+ "Bash(python -m build:*)",
17
+ "Bash(python -m twine upload:*)",
18
+ "Bash(pipx install:*)"
17
19
  ]
18
20
  }
19
21
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: speaksy
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Voice typing for Linux. Talk it. Type it. Ship it.
5
5
  Project-URL: Homepage, https://github.com/oneKn8/speaksy
6
6
  Project-URL: Repository, https://github.com/oneKn8/speaksy
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "speaksy"
7
- version = "0.1.0"
7
+ version = "0.1.1"
8
8
  description = "Voice typing for Linux. Talk it. Type it. Ship it."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -3,6 +3,7 @@
3
3
  import io
4
4
  import logging
5
5
  import os
6
+ import re
6
7
  import signal
7
8
  import subprocess
8
9
  import sys
@@ -198,52 +199,53 @@ def route_transcription(audio_buf, groq, local, config):
198
199
  # Text Cleanup (LLM post-processing)
199
200
  # ---------------------------------------------------------------------------
200
201
 
201
- CLEANUP_PROMPT = (
202
- "Clean up this voice transcription. Fix grammar, punctuation, and "
203
- "capitalization. Remove filler words (um, uh, like, you know, so, "
204
- "basically, actually). Do NOT change the meaning, add new content, "
205
- "or remove meaningful words. If the text is already clean, return it "
206
- "unchanged. Return ONLY the cleaned text, nothing else."
207
- )
202
+ # Filler words to remove (with word boundaries)
203
+ FILLER_PATTERNS = [
204
+ r"\b(um+|uh+|er+|ah+)\b",
205
+ r"\b(like,?\s+)(?=\w)", # "like" as filler, not "I like pizza"
206
+ r"\b(you know,?\s*)",
207
+ r"\b(basically,?\s*)",
208
+ r"\b(actually,?\s*)(?![\w])", # "actually" as filler
209
+ r"\b(so,?\s+)(?=[a-z])", # "so" at start as filler
210
+ r"\b(i mean,?\s*)",
211
+ r"\b(kind of|kinda)\s+",
212
+ r"\b(sort of|sorta)\s+",
213
+ ]
208
214
 
209
215
 
210
216
  class TextCleaner:
211
- """Post-process transcribed text through a fast LLM."""
217
+ """Clean up transcribed text using simple regex rules."""
212
218
 
213
- CHAT_URL = "https://api.groq.com/openai/v1/chat/completions"
214
-
215
- def __init__(self, api_key, model="llama-3.1-8b-instant"):
216
- self.api_key = api_key
217
- self.model = model
219
+ def __init__(self, api_key=None, model=None):
220
+ # API key and model not used - kept for backward compatibility
221
+ pass
218
222
 
219
223
  def clean(self, text):
220
- if not self.api_key or not text:
224
+ if not text:
221
225
  return text
222
226
 
223
- try:
224
- resp = httpx.post(
225
- self.CHAT_URL,
226
- headers={"Authorization": f"Bearer {self.api_key}"},
227
- json={
228
- "model": self.model,
229
- "messages": [
230
- {"role": "system", "content": CLEANUP_PROMPT},
231
- {"role": "user", "content": text},
232
- ],
233
- "temperature": 0,
234
- "max_tokens": len(text) * 2,
235
- },
236
- timeout=10.0,
237
- )
238
- resp.raise_for_status()
239
- cleaned = resp.json()["choices"][0]["message"]["content"].strip()
240
- if cleaned:
241
- log.info(f'[Cleanup] "{text}" -> "{cleaned}"')
242
- return cleaned
243
- except Exception as e:
244
- log.warning(f"Text cleanup failed ({e}), using raw transcription")
227
+ original = text
228
+ cleaned = text
229
+
230
+ # Remove filler words
231
+ for pattern in FILLER_PATTERNS:
232
+ cleaned = re.sub(pattern, "", cleaned, flags=re.IGNORECASE)
233
+
234
+ # Clean up multiple spaces
235
+ cleaned = re.sub(r"\s+", " ", cleaned).strip()
236
+
237
+ # Capitalize first letter
238
+ if cleaned:
239
+ cleaned = cleaned[0].upper() + cleaned[1:]
240
+
241
+ # Add period if no ending punctuation
242
+ if cleaned and cleaned[-1] not in ".!?":
243
+ cleaned += "."
244
+
245
+ if cleaned != original:
246
+ log.info(f'[Cleanup] "{original}" -> "{cleaned}"')
245
247
 
246
- return text
248
+ return cleaned
247
249
 
248
250
 
249
251
  # ---------------------------------------------------------------------------
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes