stttype 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
stttype/__init__.py ADDED
@@ -0,0 +1,8 @@
1
+ """STT Type - Voice to Text Assistant
2
+
3
+ Hold F2 to record, release to transcribe and type.
4
+ GPU-accelerated using faster-whisper.
5
+ """
6
+
7
+ __version__ = "1.0.0"
8
+ __author__ = "STT Type"
stttype/bell.py ADDED
@@ -0,0 +1,56 @@
1
+ """
2
+ Bell/sound notification module.
3
+ Cross-platform sound using system beep or platform-specific methods.
4
+ """
5
+ import platform
6
+ import sys
7
+
8
+ SYSTEM = platform.system()
9
+
10
+ if SYSTEM == "Windows":
11
+ try:
12
+ import winsound
13
+ except ImportError:
14
+ winsound = None
15
+ else:
16
+ winsound = None
17
+
18
+
19
+ def _beep(freq, duration):
20
+ """Cross-platform beep."""
21
+ if winsound:
22
+ winsound.Beep(freq, duration)
23
+ else:
24
+ # Unix fallback - print bell character
25
+ print("\a", end="", flush=True)
26
+ sys.stdout.flush()
27
+
28
+
29
+ def play_start_sound():
30
+ """Play a sound when recording starts (higher pitch beep)."""
31
+ if winsound:
32
+ winsound.MessageBeep(winsound.MB_ICONEXCLAMATION)
33
+ _beep(880, 150)
34
+ _beep(1100, 200)
35
+
36
+
37
+ def play_stop_sound():
38
+ """Play a sound when recording stops (lower pitch double beep)."""
39
+ _beep(880, 150)
40
+ _beep(660, 300)
41
+
42
+
43
+ def play_error_sound():
44
+ """Play an error sound."""
45
+ if winsound:
46
+ winsound.MessageBeep(winsound.MB_ICONHAND)
47
+ else:
48
+ print("\a\a", end="", flush=True)
49
+ sys.stdout.flush()
50
+
51
+
52
+ def play_success_sound():
53
+ """Play a success sound after transcription."""
54
+ _beep(523, 100)
55
+ _beep(659, 100)
56
+ _beep(784, 200)
stttype/cli.py ADDED
@@ -0,0 +1,226 @@
1
+ """
2
+ STT Type CLI - Universal command-line interface
3
+ """
4
+ import os
5
+ import sys
6
+ import argparse
7
+ import subprocess
8
+ import time
9
+
10
+ # Project paths
11
+ PACKAGE_DIR = os.path.dirname(os.path.abspath(__file__))
12
+ PROJECT_ROOT = os.path.dirname(PACKAGE_DIR)
13
+ SRC_DIR = os.path.join(PROJECT_ROOT, "src")
14
+ SCRIPTS_DIR = os.path.join(PROJECT_ROOT, "scripts")
15
+ STARTUP_FOLDER = os.path.join(
16
+ os.environ.get("APPDATA", ""),
17
+ "Microsoft", "Windows", "Start Menu", "Programs", "Startup"
18
+ )
19
+ SHORTCUT_PATH = os.path.join(STARTUP_FOLDER, "STT Type.lnk")
20
+ TRAY_SCRIPT = os.path.join(SCRIPTS_DIR, "run_silent.vbs")
21
+
22
+
23
+ def _run_ps(cmd):
24
+ """Run a PowerShell command and return stdout."""
25
+ result = subprocess.run(
26
+ ["powershell", "-NoProfile", "-Command", cmd],
27
+ capture_output=True, text=True, timeout=10
28
+ )
29
+ return result.stdout.strip(), result.returncode
30
+
31
+
32
+ def is_running():
33
+ """Check if STT Type is already running."""
34
+ try:
35
+ result = subprocess.run(
36
+ ["powershell", "-NoProfile", "-Command",
37
+ "Get-Process pythonw -ErrorAction SilentlyContinue | Where-Object {"
38
+ "(Get-WmiObject Win32_Process -Filter \"ProcessId=$($_.Id)\").CommandLine -match 'main\\.py'"
39
+ "} | Select-Object -First 1"],
40
+ capture_output=True, text=True, timeout=5
41
+ )
42
+ return bool(result.stdout.strip())
43
+ except Exception:
44
+ return False
45
+
46
+
47
+ def get_running_pids():
48
+ """Get list of running STT Type process IDs."""
49
+ try:
50
+ result = subprocess.run(
51
+ ["powershell", "-NoProfile", "-Command",
52
+ "$procs = @(); "
53
+ "Get-Process pythonw -ErrorAction SilentlyContinue | ForEach-Object {"
54
+ " $cmd = (Get-WmiObject Win32_Process -Filter \"ProcessId=$($_.Id)\").CommandLine;"
55
+ " if ($cmd -match 'main\\.py') { $procs += $_.Id }"
56
+ "};"
57
+ "Get-Process python -ErrorAction SilentlyContinue | ForEach-Object {"
58
+ " $cmd = (Get-WmiObject Win32_Process -Filter \"ProcessId=$($_.Id)\").CommandLine;"
59
+ " if ($cmd -match 'main\\.py') { $procs += $_.Id }"
60
+ "};"
61
+ "$procs -join ','"],
62
+ capture_output=True, text=True
63
+ )
64
+ pids = [p for p in result.stdout.strip().split(",") if p.strip().isdigit()]
65
+ return pids
66
+ except Exception:
67
+ return []
68
+
69
+
70
+ def cmd_start(args):
71
+ """Start STT Type in background."""
72
+ if is_running():
73
+ print("[WARN] STT Type is already running!")
74
+ return 1
75
+
76
+ print(f"[INFO] Starting STT Type in background...")
77
+ print(f" Model: {args.model} | Language: {args.lang}")
78
+
79
+ try:
80
+ subprocess.Popen(
81
+ ["pythonw", os.path.join(SRC_DIR, "main.py"),
82
+ "--tray", "--model", args.model, "--lang", args.lang],
83
+ cwd=PROJECT_ROOT,
84
+ creationflags=subprocess.CREATE_NO_WINDOW
85
+ )
86
+
87
+ time.sleep(2)
88
+
89
+ if is_running():
90
+ print("[OK] STT Type started successfully")
91
+ return 0
92
+ else:
93
+ print("[ERR] Failed to start STT Type")
94
+ return 1
95
+ except Exception as e:
96
+ print(f"[ERR] Error starting: {e}")
97
+ return 1
98
+
99
+
100
+ def cmd_shutdown(args):
101
+ """Stop all STT Type processes."""
102
+ print("[INFO] Shutting down STT Type...")
103
+
104
+ pids = get_running_pids()
105
+ killed = 0
106
+
107
+ for pid in pids:
108
+ try:
109
+ subprocess.run(["taskkill", "/F", "/PID", pid],
110
+ capture_output=True, check=False)
111
+ killed += 1
112
+ except Exception:
113
+ pass
114
+
115
+ if killed > 0:
116
+ print(f"[OK] Shutdown complete ({killed} process(es) killed)")
117
+ else:
118
+ print("[WARN] No STT Type processes found")
119
+ return 0
120
+
121
+
122
+ def cmd_status(args):
123
+ """Check if STT Type is running."""
124
+ pids = get_running_pids()
125
+ if pids:
126
+ print("[OK] STT Type is RUNNING")
127
+ for pid in pids:
128
+ print(f" PID: {pid}")
129
+ else:
130
+ print("[STOPPED] STT Type is not running")
131
+ return 0
132
+
133
+
134
+ def cmd_restart(args):
135
+ """Restart STT Type."""
136
+ print("[INFO] Restarting STT Type...")
137
+ cmd_shutdown(args)
138
+ time.sleep(1)
139
+ return cmd_start(args)
140
+
141
+
142
+ def cmd_addtostartup(args):
143
+ """Add STT Type to Windows startup."""
144
+ if os.path.exists(SHORTCUT_PATH):
145
+ print("[WARN] STT Type is already in startup!")
146
+ return 1
147
+
148
+ print("[INFO] Adding STT Type to Windows startup...")
149
+
150
+ ps_cmd = (
151
+ f"$WshShell = New-Object -ComObject WScript.Shell;"
152
+ f"$Shortcut = $WshShell.CreateShortcut('{SHORTCUT_PATH}');"
153
+ f"$Shortcut.TargetPath = '{TRAY_SCRIPT}';"
154
+ f"$Shortcut.WorkingDirectory = '{PROJECT_ROOT}';"
155
+ f"$Shortcut.IconLocation = 'C:\\\\Windows\\\\System32\\\\shell32.dll,22';"
156
+ f"$Shortcut.Description = 'STT Type - Voice to Text';"
157
+ f"$Shortcut.Save()"
158
+ )
159
+
160
+ try:
161
+ _run_ps(ps_cmd)
162
+
163
+ if os.path.exists(SHORTCUT_PATH):
164
+ print(f"[OK] Added to startup: {SHORTCUT_PATH}")
165
+ return 0
166
+ else:
167
+ print("[ERR] Failed to create startup shortcut")
168
+ return 1
169
+ except Exception as e:
170
+ print(f"[ERR] Error adding to startup: {e}")
171
+ return 1
172
+
173
+
174
+ def cmd_rmtostartup(args):
175
+ """Remove STT Type from Windows startup."""
176
+ if not os.path.exists(SHORTCUT_PATH):
177
+ print("[WARN] STT Type is not in startup!")
178
+ return 1
179
+
180
+ print("[INFO] Removing STT Type from Windows startup...")
181
+
182
+ try:
183
+ os.remove(SHORTCUT_PATH)
184
+ print("[OK] Removed from startup")
185
+ return 0
186
+ except Exception as e:
187
+ print(f"[ERR] Error removing from startup: {e}")
188
+ return 1
189
+
190
+
191
+ def main():
192
+ parser = argparse.ArgumentParser(
193
+ description="STT Type - Voice to Text Assistant",
194
+ formatter_class=argparse.RawDescriptionHelpFormatter
195
+ )
196
+
197
+ parser.add_argument("--start", action="store_true", help="Start STT Type in background")
198
+ parser.add_argument("--shutdown", action="store_true", help="Stop all STT Type processes")
199
+ parser.add_argument("--status", action="store_true", help="Check if STT Type is running")
200
+ parser.add_argument("--restart", action="store_true", help="Restart STT Type")
201
+ parser.add_argument("--addtostartup", action="store_true", help="Add to Windows startup")
202
+ parser.add_argument("--rmtostartup", action="store_true", help="Remove from Windows startup")
203
+ parser.add_argument("--model", default="base", help="Whisper model: tiny/base/small/medium/large-v3")
204
+ parser.add_argument("--lang", default="en", help="Language code: en/zh/auto/etc")
205
+
206
+ args = parser.parse_args()
207
+
208
+ commands = [
209
+ ("start", cmd_start),
210
+ ("shutdown", cmd_shutdown),
211
+ ("status", cmd_status),
212
+ ("restart", cmd_restart),
213
+ ("addtostartup", cmd_addtostartup),
214
+ ("rmtostartup", cmd_rmtostartup),
215
+ ]
216
+
217
+ for flag, func in commands:
218
+ if getattr(args, flag):
219
+ return func(args)
220
+
221
+ parser.print_help()
222
+ return 0
223
+
224
+
225
+ if __name__ == "__main__":
226
+ sys.exit(main())
stttype/main.py ADDED
@@ -0,0 +1,241 @@
1
+ """
2
+ STT Assistant - Hold F2 to record, release to transcribe and type.
3
+ Runs in background with system tray icon.
4
+ GPU-accelerated using faster-whisper.
5
+ Cross-platform: Windows, Linux, macOS
6
+ """
7
+ import os
8
+ import sys
9
+ import threading
10
+ import time
11
+ import argparse
12
+ import platform
13
+
14
+ # Add script directory to path
15
+ SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
16
+ if SCRIPT_DIR not in sys.path:
17
+ sys.path.insert(0, SCRIPT_DIR)
18
+
19
+ # Project root for other paths
20
+ PROJECT_ROOT = os.path.dirname(SCRIPT_DIR)
21
+ TEMP_DIR = os.path.join(PROJECT_ROOT, "temp")
22
+ os.makedirs(TEMP_DIR, exist_ok=True)
23
+
24
+ from recorder import AudioRecorder
25
+ from transcriber import WhisperTranscriber
26
+ from typer import KeyboardTyper
27
+ from bell import play_start_sound, play_stop_sound, play_error_sound, play_success_sound
28
+ from recorder_indicator import RecordingIndicator
29
+
30
+ # Global state
31
+ is_recording = False
32
+ recorder = None
33
+ transcriber = None
34
+ typer = None
35
+ audio_file = None
36
+ indicator = None
37
+ _listener = None
38
+
39
+
40
+ def on_f2_press():
41
+ """Called when F2 is pressed."""
42
+ global is_recording, recorder, audio_file, indicator
43
+
44
+ if is_recording:
45
+ return
46
+
47
+ is_recording = True
48
+ audio_file = None
49
+
50
+ print("[MIC] Recording started...")
51
+ play_start_sound()
52
+
53
+ # Show red dot indicator
54
+ if indicator:
55
+ indicator.show()
56
+
57
+ if recorder:
58
+ recorder.start()
59
+
60
+
61
+ def on_f2_release():
62
+ """Called when F2 is released."""
63
+ global is_recording, recorder, transcriber, typer, audio_file, indicator
64
+
65
+ if not is_recording:
66
+ return
67
+
68
+ is_recording = False
69
+ print("[STOP] Recording stopped. Transcribing...")
70
+ play_stop_sound()
71
+
72
+ # Hide red dot indicator
73
+ if indicator:
74
+ indicator.hide()
75
+
76
+ # Stop recording and get audio file
77
+ if recorder:
78
+ audio_file = recorder.stop()
79
+
80
+ if not audio_file or not os.path.exists(audio_file):
81
+ print("[WARN] No audio recorded")
82
+ play_error_sound()
83
+ return
84
+
85
+ try:
86
+ # Transcribe
87
+ text = transcriber.transcribe(audio_file)
88
+ print(f"[TEXT] Transcribed: {text}")
89
+
90
+ if text:
91
+ play_success_sound()
92
+ # Type the text
93
+ typer.type_text(text)
94
+ print("[TYPE] Text typed!")
95
+ else:
96
+ print("[WARN] No speech detected")
97
+ play_error_sound()
98
+
99
+ except Exception as e:
100
+ print(f"[ERR] Error: {e}")
101
+ play_error_sound()
102
+
103
+ finally:
104
+ # Cleanup audio file
105
+ if recorder and audio_file:
106
+ recorder.cleanup(audio_file)
107
+ audio_file = None
108
+
109
+
110
+ def setup_hotkeys():
111
+ """Setup global F2 hotkey listeners using pynput (cross-platform)."""
112
+ global _listener
113
+
114
+ try:
115
+ from pynput import keyboard as pk
116
+ except ImportError:
117
+ print("ERROR: 'pynput' not installed. Run: pip install pynput")
118
+ sys.exit(1)
119
+
120
+ def on_press(key):
121
+ global is_recording
122
+ if key == pk.Key.f2 and not is_recording:
123
+ on_f2_press()
124
+
125
+ def on_release(key):
126
+ global is_recording
127
+ if key == pk.Key.f2 and is_recording:
128
+ on_f2_release()
129
+
130
+ _listener = pk.Listener(on_press=on_press, on_release=on_release)
131
+ _listener.start()
132
+
133
+ print("[OK] Hotkeys registered: Hold F2 to record, release to transcribe")
134
+ print(" Press Ctrl+C or close window to exit")
135
+
136
+
137
+ def run_console():
138
+ """Run in console mode."""
139
+ global recorder, transcriber, typer, indicator
140
+
141
+ # Initialize components
142
+ print("[INIT] Initializing STT Assistant...")
143
+ print(" Loading Whisper model (this may take a moment)...")
144
+
145
+ recorder = AudioRecorder(sample_rate=16000, channels=1)
146
+ transcriber = WhisperTranscriber(
147
+ model_size="base",
148
+ device="cuda",
149
+ compute_type="float16"
150
+ )
151
+ typer = KeyboardTyper()
152
+ indicator = RecordingIndicator()
153
+
154
+ # Setup hotkeys
155
+ setup_hotkeys()
156
+
157
+ print("\n[READY] Hold F2 to record your voice.")
158
+
159
+ # Keep the main thread alive
160
+ try:
161
+ while True:
162
+ time.sleep(1)
163
+ except KeyboardInterrupt:
164
+ print("\n[EXIT] Shutting down...")
165
+ if _listener:
166
+ _listener.stop()
167
+
168
+
169
+ def run_tray():
170
+ """Run with system tray icon."""
171
+ global recorder, transcriber, typer, indicator
172
+
173
+ try:
174
+ import pystray
175
+ from PIL import Image, ImageDraw
176
+ except ImportError:
177
+ print("pystray or PIL not installed. Running in console mode.")
178
+ print("Install with: pip install pystray pillow")
179
+ run_console()
180
+ return
181
+
182
+ def create_image():
183
+ """Create a simple icon image."""
184
+ width = 64
185
+ height = 64
186
+ image = Image.new('RGB', (width, height), color=(0, 120, 212))
187
+ dc = ImageDraw.Draw(image)
188
+ dc.ellipse([8, 8, width-8, height-8], fill=(255, 255, 255))
189
+ dc.ellipse([16, 16, width-16, height-16], fill=(0, 120, 212))
190
+ return image
191
+
192
+ def on_show(icon, item):
193
+ print("STT Assistant is running. Hold F2 to record.")
194
+
195
+ def on_exit(icon, item):
196
+ icon.stop()
197
+ if _listener:
198
+ _listener.stop()
199
+ os._exit(0)
200
+
201
+ # Initialize components
202
+ print("[INIT] Initializing STT Assistant...")
203
+ recorder = AudioRecorder(sample_rate=16000, channels=1)
204
+ transcriber = WhisperTranscriber(
205
+ model_size="base",
206
+ device="cuda",
207
+ compute_type="float16"
208
+ )
209
+ typer = KeyboardTyper()
210
+ indicator = RecordingIndicator()
211
+
212
+ setup_hotkeys()
213
+
214
+ # Create tray icon
215
+ menu = pystray.Menu(
216
+ pystray.MenuItem("Show Status", on_show),
217
+ pystray.MenuItem("Exit", on_exit)
218
+ )
219
+ icon = pystray.Icon("stt-assistant", create_image(), "STT Assistant (F2 to record)", menu)
220
+
221
+ print("[READY] Hold F2 to record your voice.")
222
+ icon.run()
223
+
224
+
225
+ def main():
226
+ parser = argparse.ArgumentParser(description="STT Assistant - Voice to Text")
227
+ parser.add_argument("--tray", action="store_true", help="Run in system tray mode")
228
+ parser.add_argument("--model", default="base", help="Whisper model size (tiny/base/small/medium/large-v3)")
229
+ parser.add_argument("--device", default="cuda", help="Device (cuda/cpu)")
230
+ parser.add_argument("--compute", default="float16", help="Compute type (float16/int8)")
231
+ parser.add_argument("--lang", default="en", help="Language code (en/zh/auto)")
232
+ args = parser.parse_args()
233
+
234
+ if args.tray:
235
+ run_tray()
236
+ else:
237
+ run_console()
238
+
239
+
240
+ if __name__ == "__main__":
241
+ main()
stttype/recorder.py ADDED
@@ -0,0 +1,68 @@
1
+ """
2
+ Audio recorder module - records audio while F2 is held.
3
+ Uses sounddevice for cross-platform audio capture.
4
+ """
5
+ import sounddevice as sd
6
+ import numpy as np
7
+ import wave
8
+ import tempfile
9
+ import os
10
+
11
+
12
+ class AudioRecorder:
13
+ def __init__(self, sample_rate=16000, channels=1):
14
+ self.sample_rate = sample_rate
15
+ self.channels = channels
16
+ self.frames = []
17
+ self.is_recording = False
18
+ self.stream = None
19
+
20
+ def _callback(self, indata, frames, time_info, status):
21
+ if self.is_recording:
22
+ self.frames.append(indata.copy())
23
+
24
+ def start(self):
25
+ """Start recording audio."""
26
+ if self.is_recording:
27
+ return
28
+ self.frames = []
29
+ self.is_recording = True
30
+ self.stream = sd.InputStream(
31
+ samplerate=self.sample_rate,
32
+ channels=self.channels,
33
+ dtype=np.int16,
34
+ callback=self._callback
35
+ )
36
+ self.stream.start()
37
+
38
+ def stop(self):
39
+ """Stop recording and return path to saved WAV file."""
40
+ if not self.is_recording:
41
+ return None
42
+
43
+ self.is_recording = False
44
+ if self.stream:
45
+ self.stream.stop()
46
+ self.stream.close()
47
+ self.stream = None
48
+
49
+ if not self.frames:
50
+ return None
51
+
52
+ # Concatenate all frames
53
+ audio_data = np.concatenate(self.frames, axis=0)
54
+
55
+ # Save to temp WAV file
56
+ temp_file = tempfile.mktemp(suffix=".wav")
57
+ with wave.open(temp_file, 'wb') as wf:
58
+ wf.setnchannels(self.channels)
59
+ wf.setsampwidth(2) # 16-bit
60
+ wf.setframerate(self.sample_rate)
61
+ wf.writeframes(audio_data.tobytes())
62
+
63
+ return temp_file
64
+
65
+ def cleanup(self, filepath):
66
+ """Remove temporary audio file."""
67
+ if filepath and os.path.exists(filepath):
68
+ os.remove(filepath)
@@ -0,0 +1,102 @@
1
+ """
2
+ Recording indicator - shows a red recording dot in the top-right corner.
3
+ """
4
+ import threading
5
+
6
+
7
+ class RecordingIndicator:
8
+ """A floating red dot indicator with F2 label."""
9
+
10
+ def __init__(self, size=120, offset_x=20, offset_y=20):
11
+ self.size = size
12
+ self.offset_x = offset_x
13
+ self.offset_y = offset_y
14
+ self._root = None
15
+ self._visible = False
16
+ self._lock = threading.Lock()
17
+
18
+ def _create_window(self):
19
+ """Create the tkinter window."""
20
+ try:
21
+ import tkinter as tk
22
+
23
+ root = tk.Tk()
24
+ root.overrideredirect(True)
25
+ root.wm_attributes('-topmost', 1)
26
+ root.lift()
27
+ root.focus_force()
28
+ root.wm_attributes('-alpha', 0.5) # 50% overall transparency
29
+
30
+ screen_w = root.winfo_screenwidth()
31
+ x = screen_w - self.size - self.offset_x
32
+ y = self.offset_y
33
+ root.geometry(f'{self.size}x{self.size}+{x}+{y}')
34
+
35
+ canvas = tk.Canvas(root, width=self.size, height=self.size,
36
+ bg='black', highlightthickness=0)
37
+ canvas.pack()
38
+
39
+ padding = 3
40
+ # Draw circle
41
+ canvas.create_oval(
42
+ padding, padding, self.size - padding, self.size - padding,
43
+ fill='red', outline='darkred', width=2
44
+ )
45
+
46
+ # Smaller F2 text centered
47
+ font_size = max(10, self.size // 6)
48
+ canvas.create_text(
49
+ self.size // 2, self.size // 2,
50
+ text='F2',
51
+ fill='white',
52
+ font=('Segoe UI', font_size, 'bold')
53
+ )
54
+
55
+ # Keep on top
56
+ def keep_on_top():
57
+ if self._visible and self._root:
58
+ try:
59
+ self._root.lift()
60
+ self._root.wm_attributes('-topmost', 1)
61
+ self._root.after(100, keep_on_top)
62
+ except:
63
+ pass
64
+
65
+ root.after(100, keep_on_top)
66
+ return root
67
+
68
+ except Exception as e:
69
+ print(f"[WARN] Could not create indicator: {e}")
70
+ return None
71
+
72
+ def show(self):
73
+ """Show the recording indicator."""
74
+ with self._lock:
75
+ if self._visible:
76
+ return
77
+ self._visible = True
78
+
79
+ def _show():
80
+ try:
81
+ self._root = self._create_window()
82
+ if self._root:
83
+ self._root.mainloop()
84
+ except Exception:
85
+ pass
86
+ finally:
87
+ self._root = None
88
+
89
+ thread = threading.Thread(target=_show, daemon=True)
90
+ thread.start()
91
+
92
+ def hide(self):
93
+ """Hide the recording indicator."""
94
+ with self._lock:
95
+ self._visible = False
96
+
97
+ if self._root:
98
+ try:
99
+ self._root.after(0, self._root.destroy)
100
+ except Exception:
101
+ pass
102
+ self._root = None
stttype/transcriber.py ADDED
@@ -0,0 +1,107 @@
1
+ """
2
+ STT Transcriber using faster-whisper with GPU acceleration.
3
+ """
4
+ import os
5
+ import sys
6
+ import warnings
7
+
8
+ # Suppress FP16 warning on consumer GPUs
9
+ warnings.filterwarnings("ignore", message=".*FP16 is not supported.*")
10
+
11
+
12
+ def _ensure_cuda_libs():
13
+ """Add NVIDIA CUDA libraries to PATH for ctranslate2/faster-whisper."""
14
+ try:
15
+ import site
16
+ for site_path in site.getsitepackages():
17
+ cublas_bin = os.path.join(site_path, "nvidia", "cublas", "bin")
18
+ if os.path.exists(cublas_bin) and cublas_bin not in os.environ.get("PATH", ""):
19
+ os.environ["PATH"] = cublas_bin + os.pathsep + os.environ.get("PATH", "")
20
+ break
21
+ except Exception:
22
+ pass
23
+
24
+
25
+ _ensure_cuda_libs()
26
+
27
+
28
+ class WhisperTranscriber:
29
+ def __init__(self, model_size="base", device="cuda", compute_type="float16"):
30
+ """
31
+ Initialize Whisper transcriber.
32
+
33
+ Args:
34
+ model_size: "tiny", "base", "small", "medium", "large-v3"
35
+ device: "cuda" or "cpu"
36
+ compute_type: "float16" or "int8" (int8 for lower VRAM)
37
+ """
38
+ self.model_size = model_size
39
+ self.device = device
40
+ self.compute_type = compute_type
41
+ self.model = None
42
+ self._load_model()
43
+
44
+ def _load_model(self):
45
+ """Load the faster-whisper model."""
46
+ try:
47
+ from faster_whisper import WhisperModel
48
+ except ImportError:
49
+ raise ImportError(
50
+ "faster-whisper not installed. Run: pip install faster-whisper"
51
+ )
52
+
53
+ print(f"Loading Whisper model '{self.model_size}' on {self.device} ({self.compute_type})...")
54
+ self.model = WhisperModel(
55
+ self.model_size,
56
+ device=self.device,
57
+ compute_type=self.compute_type,
58
+ cpu_threads=4 if self.device == "cpu" else 0
59
+ )
60
+ print("Model loaded successfully!")
61
+
62
+ def transcribe(self, audio_path, language="en"):
63
+ """
64
+ Transcribe audio file to text.
65
+
66
+ Args:
67
+ audio_path: Path to audio file (WAV, MP3, etc.)
68
+ language: Language code (e.g., "en", "zh", "auto" for auto-detect)
69
+
70
+ Returns:
71
+ Transcribed text string
72
+ """
73
+ if not self.model:
74
+ raise RuntimeError("Model not loaded")
75
+
76
+ if not os.path.exists(audio_path):
77
+ return ""
78
+
79
+ segments, info = self.model.transcribe(
80
+ audio_path,
81
+ language=None if language == "auto" else language,
82
+ task="transcribe",
83
+ vad_filter=True,
84
+ vad_parameters=dict(min_silence_duration_ms=500)
85
+ )
86
+
87
+ text_parts = []
88
+ for segment in segments:
89
+ text_parts.append(segment.text.strip())
90
+
91
+ return " ".join(text_parts).strip()
92
+
93
+ def transcribe_with_fallback(self, audio_path, language="en"):
94
+ """
95
+ Transcribe with fallback to CPU if GPU fails.
96
+ """
97
+ try:
98
+ return self.transcribe(audio_path, language)
99
+ except Exception as e:
100
+ print(f"GPU transcription failed: {e}")
101
+ if self.device == "cuda":
102
+ print("Falling back to CPU...")
103
+ self.device = "cpu"
104
+ self.compute_type = "int8"
105
+ self._load_model()
106
+ return self.transcribe(audio_path, language)
107
+ raise
stttype/typer.py ADDED
@@ -0,0 +1,54 @@
1
+ """
2
+ Keyboard typer module - types transcribed text using virtual keyboard input.
3
+ Uses pyautogui for cross-platform typing.
4
+ """
5
+ import pyautogui
6
+ import time
7
+
8
+ # Configure pyautogui for safety and speed
9
+ pyautogui.FAILSAFE = True # Move mouse to corner to abort
10
+ pyautogui.PAUSE = 0.01 # Small delay between keypresses
11
+
12
+
13
+ class KeyboardTyper:
14
+ def __init__(self, typing_delay=0.01):
15
+ self.typing_delay = typing_delay
16
+
17
+ def type_text(self, text):
18
+ """
19
+ Type text using virtual keyboard.
20
+
21
+ Args:
22
+ text: String to type
23
+ """
24
+ if not text:
25
+ return
26
+
27
+ # Small delay to let user release F2 key completely
28
+ time.sleep(0.2)
29
+
30
+ # Type the text
31
+ pyautogui.typewrite(text, interval=self.typing_delay)
32
+
33
+ def type_text_instant(self, text):
34
+ """Type text with clipboard paste (faster for long text)."""
35
+ if not text:
36
+ return
37
+
38
+ import pyperclip
39
+
40
+ # Save original clipboard
41
+ original = pyperclip.paste()
42
+
43
+ try:
44
+ # Copy text to clipboard and paste
45
+ pyperclip.copy(text)
46
+ time.sleep(0.1)
47
+ pyautogui.keyDown('ctrl')
48
+ pyautogui.keyDown('v')
49
+ pyautogui.keyUp('v')
50
+ pyautogui.keyUp('ctrl')
51
+ time.sleep(0.1)
52
+ finally:
53
+ # Restore original clipboard
54
+ pyperclip.copy(original)
@@ -0,0 +1,220 @@
1
+ Metadata-Version: 2.4
2
+ Name: stttype
3
+ Version: 1.0.0
4
+ Summary: Python based STT module running on GPU
5
+ Author: LucasApps
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/LucasApps/stttype
8
+ Project-URL: Issues, https://github.com/LucasApps/stttype/issues
9
+ Keywords: stt,speech-to-text,whisper,voice,typing,keyboard
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: End Users/Desktop
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Operating System :: Microsoft :: Windows
15
+ Classifier: Operating System :: POSIX :: Linux
16
+ Classifier: Operating System :: MacOS
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
23
+ Classifier: Topic :: Utilities
24
+ Requires-Python: >=3.9
25
+ Description-Content-Type: text/markdown
26
+ Requires-Dist: faster-whisper>=1.0.0
27
+ Requires-Dist: sounddevice>=0.4.6
28
+ Requires-Dist: soundfile>=0.12.1
29
+ Requires-Dist: numpy>=1.24.0
30
+ Requires-Dist: pynput>=1.7.0
31
+ Requires-Dist: pyautogui>=0.9.54
32
+ Requires-Dist: pyperclip>=1.8.2
33
+ Requires-Dist: pystray>=0.19.4
34
+ Requires-Dist: pillow>=10.0.0
35
+ Provides-Extra: gpu
36
+ Requires-Dist: torch>=2.0.0; extra == "gpu"
37
+ Requires-Dist: torchaudio>=2.0.0; extra == "gpu"
38
+
39
+ # STT Type v1.0.0
40
+
41
+ **Python based STT module running on GPU.**
42
+
43
+ Hold **F2** to record your voice, release to transcribe and type the text automatically at your cursor position.
44
+
45
+ ## Features
46
+
47
+ - **Cross-platform** - Works on Windows, Linux, and macOS
48
+ - **Hold F2 to record** - Audio captures while key is held
49
+ - **Visual indicator** - Semi-transparent red dot with "F2" label appears in top-right corner while recording
50
+ - **Bell sounds** - Audio feedback when recording starts/stops
51
+ - **GPU-accelerated STT** - Uses faster-whisper on your NVIDIA GPU
52
+ - **Auto-typing** - Transcribed text is typed at cursor position
53
+ - **System tray mode** - Runs silently in background
54
+ - **Auto-startup** - Starts automatically on login
55
+
56
+ ## Requirements
57
+
58
+ - Python 3.9+
59
+ - NVIDIA GPU with CUDA support (for GPU mode)
60
+ - Microphone
61
+
62
+ ## Installation
63
+
64
+ ### Prerequisites
65
+
66
+ Install PyTorch with CUDA support:
67
+
68
+ ```bash
69
+ # Windows/Linux
70
+ pip install torch torchaudio --index-url https://download.pytorch.org/whl/cu118
71
+
72
+ # macOS (CPU only, no CUDA)
73
+ pip install torch torchaudio
74
+ ```
75
+
76
+ ### Windows
77
+
78
+ ```powershell
79
+ cd "E:\Lucas\STT Type"
80
+ .\install.ps1
81
+ ```
82
+
83
+ Then restart PowerShell.
84
+
85
+ ### Linux
86
+
87
+ ```bash
88
+ cd /path/to/stttype
89
+ chmod +x install.sh
90
+ ./install.sh
91
+ ```
92
+
93
+ If `sounddevice` fails, install PortAudio:
94
+ ```bash
95
+ # Debian/Ubuntu
96
+ sudo apt-get install portaudio19-dev
97
+
98
+ # Fedora
99
+ sudo dnf install portaudio-devel
100
+
101
+ # Arch
102
+ sudo pacman -S portaudio
103
+ ```
104
+
105
+ ### macOS
106
+
107
+ ```bash
108
+ cd /path/to/stttype
109
+ chmod +x install.sh
110
+ ./install.sh
111
+ ```
112
+
113
+ If `sounddevice` fails, install PortAudio:
114
+ ```bash
115
+ brew install portaudio
116
+ ```
117
+
118
+ **Note:** On macOS, you need to grant Accessibility permissions for `pynput` to capture global hotkeys. Go to **System Settings > Privacy & Security > Accessibility** and add your terminal application.
119
+
120
+ ## Commands
121
+
122
+ Once installed, `stttype` works from any terminal.
123
+
124
+ | Command | Description |
125
+ |---------|-------------|
126
+ | `stttype --start` | Start STT Type in background |
127
+ | `stttype --shutdown` | Stop all STT Type processes |
128
+ | `stttype --status` | Check if STT Type is running |
129
+ | `stttype --restart` | Restart STT Type |
130
+ | `stttype --addtostartup` | Add to startup |
131
+ | `stttype --rmtostartup` | Remove from startup |
132
+ | `stttype --model <size>` | Set Whisper model (tiny/base/small/medium/large-v3) |
133
+ | `stttype --lang <code>` | Set language (en/zh/auto/etc) |
134
+ | `stttype --help` | Show help |
135
+
136
+ ### Examples
137
+
138
+ ```bash
139
+ # Start with default settings
140
+ stttype --start
141
+
142
+ # Start with a larger model for better accuracy
143
+ stttype --start --model small
144
+
145
+ # Start with Chinese language
146
+ stttype --start --lang zh
147
+
148
+ # Start with small model and auto-detect language
149
+ stttype --start --model small --lang auto
150
+ ```
151
+
152
+ ## How It Works
153
+
154
+ 1. **Hold F2** - A semi-transparent red dot with "F2" appears in the top-right corner, microphone starts recording
155
+ 2. **Release F2** - Red dot disappears, recording stops
156
+ 3. **GPU transcribes** - Whisper processes audio on your NVIDIA GPU
157
+ 4. **Text is typed** - Result appears at your cursor position
158
+
159
+ ## Models
160
+
161
+ | Model | Size | VRAM | Speed | Accuracy |
162
+ |-------|------|------|-------|----------|
163
+ | `tiny` | 39 MB | ~1 GB | Fastest | Basic |
164
+ | `base` | 74 MB | ~1 GB | Fast | Good |
165
+ | `small` | 244 MB | ~2 GB | Medium | Better |
166
+ | `medium` | 769 MB | ~5 GB | Slower | Best |
167
+ | `large-v3` | 1550 MB | ~10 GB | Slowest | Excellent |
168
+
169
+ Default is `base` - a good balance of speed and accuracy.
170
+
171
+ ## Uninstall
172
+
173
+ ### Windows
174
+ ```powershell
175
+ cd "E:\Lucas\STT Type"
176
+ .\uninstall.ps1
177
+ ```
178
+
179
+ ### Linux/macOS
180
+ ```bash
181
+ cd /path/to/stttype
182
+ chmod +x uninstall.sh
183
+ ./uninstall.sh
184
+ ```
185
+
186
+ ## Troubleshooting
187
+
188
+ | Issue | Solution |
189
+ |-------|----------|
190
+ | `stttype` not found | Restart terminal after installation |
191
+ | "CUDA not available" | Install NVIDIA drivers and CUDA toolkit |
192
+ | No sound on start/stop | Check system volume |
193
+ | Text not typing | Make sure the target window is focused |
194
+ | Model download fails | Check internet connection |
195
+ | Hotkeys don't work (macOS) | Grant Accessibility permissions to your terminal |
196
+ | Hotkeys don't work (Linux) | Make sure you're running under X11 (not Wayland) |
197
+
198
+ ## Publish to PyPI
199
+
200
+ ```bash
201
+ # Install build tools
202
+ pip install build twine
203
+
204
+ # Build
205
+ cd /path/to/stttype
206
+ python -m build
207
+
208
+ # Upload
209
+ python -m twine upload dist/*
210
+ ```
211
+
212
+ When prompted:
213
+ - **Username**: `__token__`
214
+ - **Password**: Your PyPI API token
215
+
216
+ ---
217
+
218
+ **Author**: LucasApps
219
+ **Version**: 1.0.0
220
+ **License**: MIT
@@ -0,0 +1,13 @@
1
+ stttype/__init__.py,sha256=Y-MmjPtmAfii3yr5r3dmES0MMp8au2xGJpLaKeXIZNQ,179
2
+ stttype/bell.py,sha256=z5akiAhETMhwkuoJ1u5ZrkODUeyDX-ZdsOUbABYafoM,1223
3
+ stttype/cli.py,sha256=4W7kRJcrcphBkRQ2vhz_MPKZehwthOzdYWe0jV_Hp9g,7145
4
+ stttype/main.py,sha256=CyCZv4SYLbZpgBcm-0X2Modjk9l4qpbUIBt2Eoe2qXI,6570
5
+ stttype/recorder.py,sha256=6LoGybW3XBplvXfRq_kIa-tPAEvNFUFDemL__Qk-ec4,1948
6
+ stttype/recorder_indicator.py,sha256=cdbeZ5O7Fcur0LMzdAVehSIQ7YfDtnYQWVvZnvkpzZ0,3174
7
+ stttype/transcriber.py,sha256=2XzfEm8l0v1nebFFBImAA0EkseXYd_rAe97GvL8NozY,3449
8
+ stttype/typer.py,sha256=Z8xpiTlLynuepvp86yKLPk910EFHBjfz66TRTW-Uttc,1471
9
+ stttype-1.0.0.dist-info/METADATA,sha256=V014Xdj3J7vd5URsJ-ZeSb3jt3xUy6RHvdvut7_gB6Y,6040
10
+ stttype-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
11
+ stttype-1.0.0.dist-info/entry_points.txt,sha256=N8yigAGrQw5ew4a4N7gFoje1Qr3E4n-ksDzHWVAuQ2I,45
12
+ stttype-1.0.0.dist-info/top_level.txt,sha256=MO-2G94oAqUfkf7WFy6ouxNQ7db4UShMcHqDTWRWv18,8
13
+ stttype-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ stttype = stttype.cli:main
@@ -0,0 +1 @@
1
+ stttype