stttype 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stttype/__init__.py +8 -0
- stttype/bell.py +56 -0
- stttype/cli.py +226 -0
- stttype/main.py +241 -0
- stttype/recorder.py +68 -0
- stttype/recorder_indicator.py +102 -0
- stttype/transcriber.py +107 -0
- stttype/typer.py +54 -0
- stttype-1.0.0.dist-info/METADATA +220 -0
- stttype-1.0.0.dist-info/RECORD +13 -0
- stttype-1.0.0.dist-info/WHEEL +5 -0
- stttype-1.0.0.dist-info/entry_points.txt +2 -0
- stttype-1.0.0.dist-info/top_level.txt +1 -0
stttype/__init__.py
ADDED
stttype/bell.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Bell/sound notification module.
|
|
3
|
+
Cross-platform sound using system beep or platform-specific methods.
|
|
4
|
+
"""
|
|
5
|
+
import platform
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
SYSTEM = platform.system()
|
|
9
|
+
|
|
10
|
+
if SYSTEM == "Windows":
|
|
11
|
+
try:
|
|
12
|
+
import winsound
|
|
13
|
+
except ImportError:
|
|
14
|
+
winsound = None
|
|
15
|
+
else:
|
|
16
|
+
winsound = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _beep(freq, duration):
|
|
20
|
+
"""Cross-platform beep."""
|
|
21
|
+
if winsound:
|
|
22
|
+
winsound.Beep(freq, duration)
|
|
23
|
+
else:
|
|
24
|
+
# Unix fallback - print bell character
|
|
25
|
+
print("\a", end="", flush=True)
|
|
26
|
+
sys.stdout.flush()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def play_start_sound():
|
|
30
|
+
"""Play a sound when recording starts (higher pitch beep)."""
|
|
31
|
+
if winsound:
|
|
32
|
+
winsound.MessageBeep(winsound.MB_ICONEXCLAMATION)
|
|
33
|
+
_beep(880, 150)
|
|
34
|
+
_beep(1100, 200)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def play_stop_sound():
|
|
38
|
+
"""Play a sound when recording stops (lower pitch double beep)."""
|
|
39
|
+
_beep(880, 150)
|
|
40
|
+
_beep(660, 300)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def play_error_sound():
|
|
44
|
+
"""Play an error sound."""
|
|
45
|
+
if winsound:
|
|
46
|
+
winsound.MessageBeep(winsound.MB_ICONHAND)
|
|
47
|
+
else:
|
|
48
|
+
print("\a\a", end="", flush=True)
|
|
49
|
+
sys.stdout.flush()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def play_success_sound():
|
|
53
|
+
"""Play a success sound after transcription."""
|
|
54
|
+
_beep(523, 100)
|
|
55
|
+
_beep(659, 100)
|
|
56
|
+
_beep(784, 200)
|
stttype/cli.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
"""
|
|
2
|
+
STT Type CLI - Universal command-line interface
|
|
3
|
+
"""
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
import argparse
|
|
7
|
+
import subprocess
|
|
8
|
+
import time
|
|
9
|
+
|
|
10
|
+
# Project paths
|
|
11
|
+
PACKAGE_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
12
|
+
PROJECT_ROOT = os.path.dirname(PACKAGE_DIR)
|
|
13
|
+
SRC_DIR = os.path.join(PROJECT_ROOT, "src")
|
|
14
|
+
SCRIPTS_DIR = os.path.join(PROJECT_ROOT, "scripts")
|
|
15
|
+
STARTUP_FOLDER = os.path.join(
|
|
16
|
+
os.environ.get("APPDATA", ""),
|
|
17
|
+
"Microsoft", "Windows", "Start Menu", "Programs", "Startup"
|
|
18
|
+
)
|
|
19
|
+
SHORTCUT_PATH = os.path.join(STARTUP_FOLDER, "STT Type.lnk")
|
|
20
|
+
TRAY_SCRIPT = os.path.join(SCRIPTS_DIR, "run_silent.vbs")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _run_ps(cmd):
|
|
24
|
+
"""Run a PowerShell command and return stdout."""
|
|
25
|
+
result = subprocess.run(
|
|
26
|
+
["powershell", "-NoProfile", "-Command", cmd],
|
|
27
|
+
capture_output=True, text=True, timeout=10
|
|
28
|
+
)
|
|
29
|
+
return result.stdout.strip(), result.returncode
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def is_running():
|
|
33
|
+
"""Check if STT Type is already running."""
|
|
34
|
+
try:
|
|
35
|
+
result = subprocess.run(
|
|
36
|
+
["powershell", "-NoProfile", "-Command",
|
|
37
|
+
"Get-Process pythonw -ErrorAction SilentlyContinue | Where-Object {"
|
|
38
|
+
"(Get-WmiObject Win32_Process -Filter \"ProcessId=$($_.Id)\").CommandLine -match 'main\\.py'"
|
|
39
|
+
"} | Select-Object -First 1"],
|
|
40
|
+
capture_output=True, text=True, timeout=5
|
|
41
|
+
)
|
|
42
|
+
return bool(result.stdout.strip())
|
|
43
|
+
except Exception:
|
|
44
|
+
return False
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_running_pids():
|
|
48
|
+
"""Get list of running STT Type process IDs."""
|
|
49
|
+
try:
|
|
50
|
+
result = subprocess.run(
|
|
51
|
+
["powershell", "-NoProfile", "-Command",
|
|
52
|
+
"$procs = @(); "
|
|
53
|
+
"Get-Process pythonw -ErrorAction SilentlyContinue | ForEach-Object {"
|
|
54
|
+
" $cmd = (Get-WmiObject Win32_Process -Filter \"ProcessId=$($_.Id)\").CommandLine;"
|
|
55
|
+
" if ($cmd -match 'main\\.py') { $procs += $_.Id }"
|
|
56
|
+
"};"
|
|
57
|
+
"Get-Process python -ErrorAction SilentlyContinue | ForEach-Object {"
|
|
58
|
+
" $cmd = (Get-WmiObject Win32_Process -Filter \"ProcessId=$($_.Id)\").CommandLine;"
|
|
59
|
+
" if ($cmd -match 'main\\.py') { $procs += $_.Id }"
|
|
60
|
+
"};"
|
|
61
|
+
"$procs -join ','"],
|
|
62
|
+
capture_output=True, text=True
|
|
63
|
+
)
|
|
64
|
+
pids = [p for p in result.stdout.strip().split(",") if p.strip().isdigit()]
|
|
65
|
+
return pids
|
|
66
|
+
except Exception:
|
|
67
|
+
return []
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def cmd_start(args):
|
|
71
|
+
"""Start STT Type in background."""
|
|
72
|
+
if is_running():
|
|
73
|
+
print("[WARN] STT Type is already running!")
|
|
74
|
+
return 1
|
|
75
|
+
|
|
76
|
+
print(f"[INFO] Starting STT Type in background...")
|
|
77
|
+
print(f" Model: {args.model} | Language: {args.lang}")
|
|
78
|
+
|
|
79
|
+
try:
|
|
80
|
+
subprocess.Popen(
|
|
81
|
+
["pythonw", os.path.join(SRC_DIR, "main.py"),
|
|
82
|
+
"--tray", "--model", args.model, "--lang", args.lang],
|
|
83
|
+
cwd=PROJECT_ROOT,
|
|
84
|
+
creationflags=subprocess.CREATE_NO_WINDOW
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
time.sleep(2)
|
|
88
|
+
|
|
89
|
+
if is_running():
|
|
90
|
+
print("[OK] STT Type started successfully")
|
|
91
|
+
return 0
|
|
92
|
+
else:
|
|
93
|
+
print("[ERR] Failed to start STT Type")
|
|
94
|
+
return 1
|
|
95
|
+
except Exception as e:
|
|
96
|
+
print(f"[ERR] Error starting: {e}")
|
|
97
|
+
return 1
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def cmd_shutdown(args):
|
|
101
|
+
"""Stop all STT Type processes."""
|
|
102
|
+
print("[INFO] Shutting down STT Type...")
|
|
103
|
+
|
|
104
|
+
pids = get_running_pids()
|
|
105
|
+
killed = 0
|
|
106
|
+
|
|
107
|
+
for pid in pids:
|
|
108
|
+
try:
|
|
109
|
+
subprocess.run(["taskkill", "/F", "/PID", pid],
|
|
110
|
+
capture_output=True, check=False)
|
|
111
|
+
killed += 1
|
|
112
|
+
except Exception:
|
|
113
|
+
pass
|
|
114
|
+
|
|
115
|
+
if killed > 0:
|
|
116
|
+
print(f"[OK] Shutdown complete ({killed} process(es) killed)")
|
|
117
|
+
else:
|
|
118
|
+
print("[WARN] No STT Type processes found")
|
|
119
|
+
return 0
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def cmd_status(args):
|
|
123
|
+
"""Check if STT Type is running."""
|
|
124
|
+
pids = get_running_pids()
|
|
125
|
+
if pids:
|
|
126
|
+
print("[OK] STT Type is RUNNING")
|
|
127
|
+
for pid in pids:
|
|
128
|
+
print(f" PID: {pid}")
|
|
129
|
+
else:
|
|
130
|
+
print("[STOPPED] STT Type is not running")
|
|
131
|
+
return 0
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def cmd_restart(args):
|
|
135
|
+
"""Restart STT Type."""
|
|
136
|
+
print("[INFO] Restarting STT Type...")
|
|
137
|
+
cmd_shutdown(args)
|
|
138
|
+
time.sleep(1)
|
|
139
|
+
return cmd_start(args)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def cmd_addtostartup(args):
|
|
143
|
+
"""Add STT Type to Windows startup."""
|
|
144
|
+
if os.path.exists(SHORTCUT_PATH):
|
|
145
|
+
print("[WARN] STT Type is already in startup!")
|
|
146
|
+
return 1
|
|
147
|
+
|
|
148
|
+
print("[INFO] Adding STT Type to Windows startup...")
|
|
149
|
+
|
|
150
|
+
ps_cmd = (
|
|
151
|
+
f"$WshShell = New-Object -ComObject WScript.Shell;"
|
|
152
|
+
f"$Shortcut = $WshShell.CreateShortcut('{SHORTCUT_PATH}');"
|
|
153
|
+
f"$Shortcut.TargetPath = '{TRAY_SCRIPT}';"
|
|
154
|
+
f"$Shortcut.WorkingDirectory = '{PROJECT_ROOT}';"
|
|
155
|
+
f"$Shortcut.IconLocation = 'C:\\\\Windows\\\\System32\\\\shell32.dll,22';"
|
|
156
|
+
f"$Shortcut.Description = 'STT Type - Voice to Text';"
|
|
157
|
+
f"$Shortcut.Save()"
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
try:
|
|
161
|
+
_run_ps(ps_cmd)
|
|
162
|
+
|
|
163
|
+
if os.path.exists(SHORTCUT_PATH):
|
|
164
|
+
print(f"[OK] Added to startup: {SHORTCUT_PATH}")
|
|
165
|
+
return 0
|
|
166
|
+
else:
|
|
167
|
+
print("[ERR] Failed to create startup shortcut")
|
|
168
|
+
return 1
|
|
169
|
+
except Exception as e:
|
|
170
|
+
print(f"[ERR] Error adding to startup: {e}")
|
|
171
|
+
return 1
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def cmd_rmtostartup(args):
|
|
175
|
+
"""Remove STT Type from Windows startup."""
|
|
176
|
+
if not os.path.exists(SHORTCUT_PATH):
|
|
177
|
+
print("[WARN] STT Type is not in startup!")
|
|
178
|
+
return 1
|
|
179
|
+
|
|
180
|
+
print("[INFO] Removing STT Type from Windows startup...")
|
|
181
|
+
|
|
182
|
+
try:
|
|
183
|
+
os.remove(SHORTCUT_PATH)
|
|
184
|
+
print("[OK] Removed from startup")
|
|
185
|
+
return 0
|
|
186
|
+
except Exception as e:
|
|
187
|
+
print(f"[ERR] Error removing from startup: {e}")
|
|
188
|
+
return 1
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def main():
|
|
192
|
+
parser = argparse.ArgumentParser(
|
|
193
|
+
description="STT Type - Voice to Text Assistant",
|
|
194
|
+
formatter_class=argparse.RawDescriptionHelpFormatter
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
parser.add_argument("--start", action="store_true", help="Start STT Type in background")
|
|
198
|
+
parser.add_argument("--shutdown", action="store_true", help="Stop all STT Type processes")
|
|
199
|
+
parser.add_argument("--status", action="store_true", help="Check if STT Type is running")
|
|
200
|
+
parser.add_argument("--restart", action="store_true", help="Restart STT Type")
|
|
201
|
+
parser.add_argument("--addtostartup", action="store_true", help="Add to Windows startup")
|
|
202
|
+
parser.add_argument("--rmtostartup", action="store_true", help="Remove from Windows startup")
|
|
203
|
+
parser.add_argument("--model", default="base", help="Whisper model: tiny/base/small/medium/large-v3")
|
|
204
|
+
parser.add_argument("--lang", default="en", help="Language code: en/zh/auto/etc")
|
|
205
|
+
|
|
206
|
+
args = parser.parse_args()
|
|
207
|
+
|
|
208
|
+
commands = [
|
|
209
|
+
("start", cmd_start),
|
|
210
|
+
("shutdown", cmd_shutdown),
|
|
211
|
+
("status", cmd_status),
|
|
212
|
+
("restart", cmd_restart),
|
|
213
|
+
("addtostartup", cmd_addtostartup),
|
|
214
|
+
("rmtostartup", cmd_rmtostartup),
|
|
215
|
+
]
|
|
216
|
+
|
|
217
|
+
for flag, func in commands:
|
|
218
|
+
if getattr(args, flag):
|
|
219
|
+
return func(args)
|
|
220
|
+
|
|
221
|
+
parser.print_help()
|
|
222
|
+
return 0
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
if __name__ == "__main__":
|
|
226
|
+
sys.exit(main())
|
stttype/main.py
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
"""
|
|
2
|
+
STT Assistant - Hold F2 to record, release to transcribe and type.
|
|
3
|
+
Runs in background with system tray icon.
|
|
4
|
+
GPU-accelerated using faster-whisper.
|
|
5
|
+
Cross-platform: Windows, Linux, macOS
|
|
6
|
+
"""
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
import threading
|
|
10
|
+
import time
|
|
11
|
+
import argparse
|
|
12
|
+
import platform
|
|
13
|
+
|
|
14
|
+
# Add script directory to path
|
|
15
|
+
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
16
|
+
if SCRIPT_DIR not in sys.path:
|
|
17
|
+
sys.path.insert(0, SCRIPT_DIR)
|
|
18
|
+
|
|
19
|
+
# Project root for other paths
|
|
20
|
+
PROJECT_ROOT = os.path.dirname(SCRIPT_DIR)
|
|
21
|
+
TEMP_DIR = os.path.join(PROJECT_ROOT, "temp")
|
|
22
|
+
os.makedirs(TEMP_DIR, exist_ok=True)
|
|
23
|
+
|
|
24
|
+
from recorder import AudioRecorder
|
|
25
|
+
from transcriber import WhisperTranscriber
|
|
26
|
+
from typer import KeyboardTyper
|
|
27
|
+
from bell import play_start_sound, play_stop_sound, play_error_sound, play_success_sound
|
|
28
|
+
from recorder_indicator import RecordingIndicator
|
|
29
|
+
|
|
30
|
+
# Global state
|
|
31
|
+
is_recording = False
|
|
32
|
+
recorder = None
|
|
33
|
+
transcriber = None
|
|
34
|
+
typer = None
|
|
35
|
+
audio_file = None
|
|
36
|
+
indicator = None
|
|
37
|
+
_listener = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def on_f2_press():
|
|
41
|
+
"""Called when F2 is pressed."""
|
|
42
|
+
global is_recording, recorder, audio_file, indicator
|
|
43
|
+
|
|
44
|
+
if is_recording:
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
is_recording = True
|
|
48
|
+
audio_file = None
|
|
49
|
+
|
|
50
|
+
print("[MIC] Recording started...")
|
|
51
|
+
play_start_sound()
|
|
52
|
+
|
|
53
|
+
# Show red dot indicator
|
|
54
|
+
if indicator:
|
|
55
|
+
indicator.show()
|
|
56
|
+
|
|
57
|
+
if recorder:
|
|
58
|
+
recorder.start()
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def on_f2_release():
|
|
62
|
+
"""Called when F2 is released."""
|
|
63
|
+
global is_recording, recorder, transcriber, typer, audio_file, indicator
|
|
64
|
+
|
|
65
|
+
if not is_recording:
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
is_recording = False
|
|
69
|
+
print("[STOP] Recording stopped. Transcribing...")
|
|
70
|
+
play_stop_sound()
|
|
71
|
+
|
|
72
|
+
# Hide red dot indicator
|
|
73
|
+
if indicator:
|
|
74
|
+
indicator.hide()
|
|
75
|
+
|
|
76
|
+
# Stop recording and get audio file
|
|
77
|
+
if recorder:
|
|
78
|
+
audio_file = recorder.stop()
|
|
79
|
+
|
|
80
|
+
if not audio_file or not os.path.exists(audio_file):
|
|
81
|
+
print("[WARN] No audio recorded")
|
|
82
|
+
play_error_sound()
|
|
83
|
+
return
|
|
84
|
+
|
|
85
|
+
try:
|
|
86
|
+
# Transcribe
|
|
87
|
+
text = transcriber.transcribe(audio_file)
|
|
88
|
+
print(f"[TEXT] Transcribed: {text}")
|
|
89
|
+
|
|
90
|
+
if text:
|
|
91
|
+
play_success_sound()
|
|
92
|
+
# Type the text
|
|
93
|
+
typer.type_text(text)
|
|
94
|
+
print("[TYPE] Text typed!")
|
|
95
|
+
else:
|
|
96
|
+
print("[WARN] No speech detected")
|
|
97
|
+
play_error_sound()
|
|
98
|
+
|
|
99
|
+
except Exception as e:
|
|
100
|
+
print(f"[ERR] Error: {e}")
|
|
101
|
+
play_error_sound()
|
|
102
|
+
|
|
103
|
+
finally:
|
|
104
|
+
# Cleanup audio file
|
|
105
|
+
if recorder and audio_file:
|
|
106
|
+
recorder.cleanup(audio_file)
|
|
107
|
+
audio_file = None
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def setup_hotkeys():
|
|
111
|
+
"""Setup global F2 hotkey listeners using pynput (cross-platform)."""
|
|
112
|
+
global _listener
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
from pynput import keyboard as pk
|
|
116
|
+
except ImportError:
|
|
117
|
+
print("ERROR: 'pynput' not installed. Run: pip install pynput")
|
|
118
|
+
sys.exit(1)
|
|
119
|
+
|
|
120
|
+
def on_press(key):
|
|
121
|
+
global is_recording
|
|
122
|
+
if key == pk.Key.f2 and not is_recording:
|
|
123
|
+
on_f2_press()
|
|
124
|
+
|
|
125
|
+
def on_release(key):
|
|
126
|
+
global is_recording
|
|
127
|
+
if key == pk.Key.f2 and is_recording:
|
|
128
|
+
on_f2_release()
|
|
129
|
+
|
|
130
|
+
_listener = pk.Listener(on_press=on_press, on_release=on_release)
|
|
131
|
+
_listener.start()
|
|
132
|
+
|
|
133
|
+
print("[OK] Hotkeys registered: Hold F2 to record, release to transcribe")
|
|
134
|
+
print(" Press Ctrl+C or close window to exit")
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def run_console():
|
|
138
|
+
"""Run in console mode."""
|
|
139
|
+
global recorder, transcriber, typer, indicator
|
|
140
|
+
|
|
141
|
+
# Initialize components
|
|
142
|
+
print("[INIT] Initializing STT Assistant...")
|
|
143
|
+
print(" Loading Whisper model (this may take a moment)...")
|
|
144
|
+
|
|
145
|
+
recorder = AudioRecorder(sample_rate=16000, channels=1)
|
|
146
|
+
transcriber = WhisperTranscriber(
|
|
147
|
+
model_size="base",
|
|
148
|
+
device="cuda",
|
|
149
|
+
compute_type="float16"
|
|
150
|
+
)
|
|
151
|
+
typer = KeyboardTyper()
|
|
152
|
+
indicator = RecordingIndicator()
|
|
153
|
+
|
|
154
|
+
# Setup hotkeys
|
|
155
|
+
setup_hotkeys()
|
|
156
|
+
|
|
157
|
+
print("\n[READY] Hold F2 to record your voice.")
|
|
158
|
+
|
|
159
|
+
# Keep the main thread alive
|
|
160
|
+
try:
|
|
161
|
+
while True:
|
|
162
|
+
time.sleep(1)
|
|
163
|
+
except KeyboardInterrupt:
|
|
164
|
+
print("\n[EXIT] Shutting down...")
|
|
165
|
+
if _listener:
|
|
166
|
+
_listener.stop()
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def run_tray():
|
|
170
|
+
"""Run with system tray icon."""
|
|
171
|
+
global recorder, transcriber, typer, indicator
|
|
172
|
+
|
|
173
|
+
try:
|
|
174
|
+
import pystray
|
|
175
|
+
from PIL import Image, ImageDraw
|
|
176
|
+
except ImportError:
|
|
177
|
+
print("pystray or PIL not installed. Running in console mode.")
|
|
178
|
+
print("Install with: pip install pystray pillow")
|
|
179
|
+
run_console()
|
|
180
|
+
return
|
|
181
|
+
|
|
182
|
+
def create_image():
|
|
183
|
+
"""Create a simple icon image."""
|
|
184
|
+
width = 64
|
|
185
|
+
height = 64
|
|
186
|
+
image = Image.new('RGB', (width, height), color=(0, 120, 212))
|
|
187
|
+
dc = ImageDraw.Draw(image)
|
|
188
|
+
dc.ellipse([8, 8, width-8, height-8], fill=(255, 255, 255))
|
|
189
|
+
dc.ellipse([16, 16, width-16, height-16], fill=(0, 120, 212))
|
|
190
|
+
return image
|
|
191
|
+
|
|
192
|
+
def on_show(icon, item):
|
|
193
|
+
print("STT Assistant is running. Hold F2 to record.")
|
|
194
|
+
|
|
195
|
+
def on_exit(icon, item):
|
|
196
|
+
icon.stop()
|
|
197
|
+
if _listener:
|
|
198
|
+
_listener.stop()
|
|
199
|
+
os._exit(0)
|
|
200
|
+
|
|
201
|
+
# Initialize components
|
|
202
|
+
print("[INIT] Initializing STT Assistant...")
|
|
203
|
+
recorder = AudioRecorder(sample_rate=16000, channels=1)
|
|
204
|
+
transcriber = WhisperTranscriber(
|
|
205
|
+
model_size="base",
|
|
206
|
+
device="cuda",
|
|
207
|
+
compute_type="float16"
|
|
208
|
+
)
|
|
209
|
+
typer = KeyboardTyper()
|
|
210
|
+
indicator = RecordingIndicator()
|
|
211
|
+
|
|
212
|
+
setup_hotkeys()
|
|
213
|
+
|
|
214
|
+
# Create tray icon
|
|
215
|
+
menu = pystray.Menu(
|
|
216
|
+
pystray.MenuItem("Show Status", on_show),
|
|
217
|
+
pystray.MenuItem("Exit", on_exit)
|
|
218
|
+
)
|
|
219
|
+
icon = pystray.Icon("stt-assistant", create_image(), "STT Assistant (F2 to record)", menu)
|
|
220
|
+
|
|
221
|
+
print("[READY] Hold F2 to record your voice.")
|
|
222
|
+
icon.run()
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def main():
|
|
226
|
+
parser = argparse.ArgumentParser(description="STT Assistant - Voice to Text")
|
|
227
|
+
parser.add_argument("--tray", action="store_true", help="Run in system tray mode")
|
|
228
|
+
parser.add_argument("--model", default="base", help="Whisper model size (tiny/base/small/medium/large-v3)")
|
|
229
|
+
parser.add_argument("--device", default="cuda", help="Device (cuda/cpu)")
|
|
230
|
+
parser.add_argument("--compute", default="float16", help="Compute type (float16/int8)")
|
|
231
|
+
parser.add_argument("--lang", default="en", help="Language code (en/zh/auto)")
|
|
232
|
+
args = parser.parse_args()
|
|
233
|
+
|
|
234
|
+
if args.tray:
|
|
235
|
+
run_tray()
|
|
236
|
+
else:
|
|
237
|
+
run_console()
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
if __name__ == "__main__":
|
|
241
|
+
main()
|
stttype/recorder.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Audio recorder module - records audio while F2 is held.
|
|
3
|
+
Uses sounddevice for cross-platform audio capture.
|
|
4
|
+
"""
|
|
5
|
+
import sounddevice as sd
|
|
6
|
+
import numpy as np
|
|
7
|
+
import wave
|
|
8
|
+
import tempfile
|
|
9
|
+
import os
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class AudioRecorder:
|
|
13
|
+
def __init__(self, sample_rate=16000, channels=1):
|
|
14
|
+
self.sample_rate = sample_rate
|
|
15
|
+
self.channels = channels
|
|
16
|
+
self.frames = []
|
|
17
|
+
self.is_recording = False
|
|
18
|
+
self.stream = None
|
|
19
|
+
|
|
20
|
+
def _callback(self, indata, frames, time_info, status):
|
|
21
|
+
if self.is_recording:
|
|
22
|
+
self.frames.append(indata.copy())
|
|
23
|
+
|
|
24
|
+
def start(self):
|
|
25
|
+
"""Start recording audio."""
|
|
26
|
+
if self.is_recording:
|
|
27
|
+
return
|
|
28
|
+
self.frames = []
|
|
29
|
+
self.is_recording = True
|
|
30
|
+
self.stream = sd.InputStream(
|
|
31
|
+
samplerate=self.sample_rate,
|
|
32
|
+
channels=self.channels,
|
|
33
|
+
dtype=np.int16,
|
|
34
|
+
callback=self._callback
|
|
35
|
+
)
|
|
36
|
+
self.stream.start()
|
|
37
|
+
|
|
38
|
+
def stop(self):
|
|
39
|
+
"""Stop recording and return path to saved WAV file."""
|
|
40
|
+
if not self.is_recording:
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
self.is_recording = False
|
|
44
|
+
if self.stream:
|
|
45
|
+
self.stream.stop()
|
|
46
|
+
self.stream.close()
|
|
47
|
+
self.stream = None
|
|
48
|
+
|
|
49
|
+
if not self.frames:
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
# Concatenate all frames
|
|
53
|
+
audio_data = np.concatenate(self.frames, axis=0)
|
|
54
|
+
|
|
55
|
+
# Save to temp WAV file
|
|
56
|
+
temp_file = tempfile.mktemp(suffix=".wav")
|
|
57
|
+
with wave.open(temp_file, 'wb') as wf:
|
|
58
|
+
wf.setnchannels(self.channels)
|
|
59
|
+
wf.setsampwidth(2) # 16-bit
|
|
60
|
+
wf.setframerate(self.sample_rate)
|
|
61
|
+
wf.writeframes(audio_data.tobytes())
|
|
62
|
+
|
|
63
|
+
return temp_file
|
|
64
|
+
|
|
65
|
+
def cleanup(self, filepath):
|
|
66
|
+
"""Remove temporary audio file."""
|
|
67
|
+
if filepath and os.path.exists(filepath):
|
|
68
|
+
os.remove(filepath)
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Recording indicator - shows a red recording dot in the top-right corner.
|
|
3
|
+
"""
|
|
4
|
+
import threading
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class RecordingIndicator:
|
|
8
|
+
"""A floating red dot indicator with F2 label."""
|
|
9
|
+
|
|
10
|
+
def __init__(self, size=120, offset_x=20, offset_y=20):
|
|
11
|
+
self.size = size
|
|
12
|
+
self.offset_x = offset_x
|
|
13
|
+
self.offset_y = offset_y
|
|
14
|
+
self._root = None
|
|
15
|
+
self._visible = False
|
|
16
|
+
self._lock = threading.Lock()
|
|
17
|
+
|
|
18
|
+
def _create_window(self):
|
|
19
|
+
"""Create the tkinter window."""
|
|
20
|
+
try:
|
|
21
|
+
import tkinter as tk
|
|
22
|
+
|
|
23
|
+
root = tk.Tk()
|
|
24
|
+
root.overrideredirect(True)
|
|
25
|
+
root.wm_attributes('-topmost', 1)
|
|
26
|
+
root.lift()
|
|
27
|
+
root.focus_force()
|
|
28
|
+
root.wm_attributes('-alpha', 0.5) # 50% overall transparency
|
|
29
|
+
|
|
30
|
+
screen_w = root.winfo_screenwidth()
|
|
31
|
+
x = screen_w - self.size - self.offset_x
|
|
32
|
+
y = self.offset_y
|
|
33
|
+
root.geometry(f'{self.size}x{self.size}+{x}+{y}')
|
|
34
|
+
|
|
35
|
+
canvas = tk.Canvas(root, width=self.size, height=self.size,
|
|
36
|
+
bg='black', highlightthickness=0)
|
|
37
|
+
canvas.pack()
|
|
38
|
+
|
|
39
|
+
padding = 3
|
|
40
|
+
# Draw circle
|
|
41
|
+
canvas.create_oval(
|
|
42
|
+
padding, padding, self.size - padding, self.size - padding,
|
|
43
|
+
fill='red', outline='darkred', width=2
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Smaller F2 text centered
|
|
47
|
+
font_size = max(10, self.size // 6)
|
|
48
|
+
canvas.create_text(
|
|
49
|
+
self.size // 2, self.size // 2,
|
|
50
|
+
text='F2',
|
|
51
|
+
fill='white',
|
|
52
|
+
font=('Segoe UI', font_size, 'bold')
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Keep on top
|
|
56
|
+
def keep_on_top():
|
|
57
|
+
if self._visible and self._root:
|
|
58
|
+
try:
|
|
59
|
+
self._root.lift()
|
|
60
|
+
self._root.wm_attributes('-topmost', 1)
|
|
61
|
+
self._root.after(100, keep_on_top)
|
|
62
|
+
except:
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
root.after(100, keep_on_top)
|
|
66
|
+
return root
|
|
67
|
+
|
|
68
|
+
except Exception as e:
|
|
69
|
+
print(f"[WARN] Could not create indicator: {e}")
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
def show(self):
|
|
73
|
+
"""Show the recording indicator."""
|
|
74
|
+
with self._lock:
|
|
75
|
+
if self._visible:
|
|
76
|
+
return
|
|
77
|
+
self._visible = True
|
|
78
|
+
|
|
79
|
+
def _show():
|
|
80
|
+
try:
|
|
81
|
+
self._root = self._create_window()
|
|
82
|
+
if self._root:
|
|
83
|
+
self._root.mainloop()
|
|
84
|
+
except Exception:
|
|
85
|
+
pass
|
|
86
|
+
finally:
|
|
87
|
+
self._root = None
|
|
88
|
+
|
|
89
|
+
thread = threading.Thread(target=_show, daemon=True)
|
|
90
|
+
thread.start()
|
|
91
|
+
|
|
92
|
+
def hide(self):
|
|
93
|
+
"""Hide the recording indicator."""
|
|
94
|
+
with self._lock:
|
|
95
|
+
self._visible = False
|
|
96
|
+
|
|
97
|
+
if self._root:
|
|
98
|
+
try:
|
|
99
|
+
self._root.after(0, self._root.destroy)
|
|
100
|
+
except Exception:
|
|
101
|
+
pass
|
|
102
|
+
self._root = None
|
stttype/transcriber.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""
|
|
2
|
+
STT Transcriber using faster-whisper with GPU acceleration.
|
|
3
|
+
"""
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
import warnings
|
|
7
|
+
|
|
8
|
+
# Suppress FP16 warning on consumer GPUs
|
|
9
|
+
warnings.filterwarnings("ignore", message=".*FP16 is not supported.*")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _ensure_cuda_libs():
|
|
13
|
+
"""Add NVIDIA CUDA libraries to PATH for ctranslate2/faster-whisper."""
|
|
14
|
+
try:
|
|
15
|
+
import site
|
|
16
|
+
for site_path in site.getsitepackages():
|
|
17
|
+
cublas_bin = os.path.join(site_path, "nvidia", "cublas", "bin")
|
|
18
|
+
if os.path.exists(cublas_bin) and cublas_bin not in os.environ.get("PATH", ""):
|
|
19
|
+
os.environ["PATH"] = cublas_bin + os.pathsep + os.environ.get("PATH", "")
|
|
20
|
+
break
|
|
21
|
+
except Exception:
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
_ensure_cuda_libs()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class WhisperTranscriber:
|
|
29
|
+
def __init__(self, model_size="base", device="cuda", compute_type="float16"):
|
|
30
|
+
"""
|
|
31
|
+
Initialize Whisper transcriber.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
model_size: "tiny", "base", "small", "medium", "large-v3"
|
|
35
|
+
device: "cuda" or "cpu"
|
|
36
|
+
compute_type: "float16" or "int8" (int8 for lower VRAM)
|
|
37
|
+
"""
|
|
38
|
+
self.model_size = model_size
|
|
39
|
+
self.device = device
|
|
40
|
+
self.compute_type = compute_type
|
|
41
|
+
self.model = None
|
|
42
|
+
self._load_model()
|
|
43
|
+
|
|
44
|
+
def _load_model(self):
|
|
45
|
+
"""Load the faster-whisper model."""
|
|
46
|
+
try:
|
|
47
|
+
from faster_whisper import WhisperModel
|
|
48
|
+
except ImportError:
|
|
49
|
+
raise ImportError(
|
|
50
|
+
"faster-whisper not installed. Run: pip install faster-whisper"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
print(f"Loading Whisper model '{self.model_size}' on {self.device} ({self.compute_type})...")
|
|
54
|
+
self.model = WhisperModel(
|
|
55
|
+
self.model_size,
|
|
56
|
+
device=self.device,
|
|
57
|
+
compute_type=self.compute_type,
|
|
58
|
+
cpu_threads=4 if self.device == "cpu" else 0
|
|
59
|
+
)
|
|
60
|
+
print("Model loaded successfully!")
|
|
61
|
+
|
|
62
|
+
def transcribe(self, audio_path, language="en"):
|
|
63
|
+
"""
|
|
64
|
+
Transcribe audio file to text.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
audio_path: Path to audio file (WAV, MP3, etc.)
|
|
68
|
+
language: Language code (e.g., "en", "zh", "auto" for auto-detect)
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Transcribed text string
|
|
72
|
+
"""
|
|
73
|
+
if not self.model:
|
|
74
|
+
raise RuntimeError("Model not loaded")
|
|
75
|
+
|
|
76
|
+
if not os.path.exists(audio_path):
|
|
77
|
+
return ""
|
|
78
|
+
|
|
79
|
+
segments, info = self.model.transcribe(
|
|
80
|
+
audio_path,
|
|
81
|
+
language=None if language == "auto" else language,
|
|
82
|
+
task="transcribe",
|
|
83
|
+
vad_filter=True,
|
|
84
|
+
vad_parameters=dict(min_silence_duration_ms=500)
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
text_parts = []
|
|
88
|
+
for segment in segments:
|
|
89
|
+
text_parts.append(segment.text.strip())
|
|
90
|
+
|
|
91
|
+
return " ".join(text_parts).strip()
|
|
92
|
+
|
|
93
|
+
def transcribe_with_fallback(self, audio_path, language="en"):
|
|
94
|
+
"""
|
|
95
|
+
Transcribe with fallback to CPU if GPU fails.
|
|
96
|
+
"""
|
|
97
|
+
try:
|
|
98
|
+
return self.transcribe(audio_path, language)
|
|
99
|
+
except Exception as e:
|
|
100
|
+
print(f"GPU transcription failed: {e}")
|
|
101
|
+
if self.device == "cuda":
|
|
102
|
+
print("Falling back to CPU...")
|
|
103
|
+
self.device = "cpu"
|
|
104
|
+
self.compute_type = "int8"
|
|
105
|
+
self._load_model()
|
|
106
|
+
return self.transcribe(audio_path, language)
|
|
107
|
+
raise
|
stttype/typer.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Keyboard typer module - types transcribed text using virtual keyboard input.
|
|
3
|
+
Uses pyautogui for cross-platform typing.
|
|
4
|
+
"""
|
|
5
|
+
import pyautogui
|
|
6
|
+
import time
|
|
7
|
+
|
|
8
|
+
# Configure pyautogui for safety and speed
|
|
9
|
+
pyautogui.FAILSAFE = True # Move mouse to corner to abort
|
|
10
|
+
pyautogui.PAUSE = 0.01 # Small delay between keypresses
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class KeyboardTyper:
|
|
14
|
+
def __init__(self, typing_delay=0.01):
|
|
15
|
+
self.typing_delay = typing_delay
|
|
16
|
+
|
|
17
|
+
def type_text(self, text):
|
|
18
|
+
"""
|
|
19
|
+
Type text using virtual keyboard.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
text: String to type
|
|
23
|
+
"""
|
|
24
|
+
if not text:
|
|
25
|
+
return
|
|
26
|
+
|
|
27
|
+
# Small delay to let user release F2 key completely
|
|
28
|
+
time.sleep(0.2)
|
|
29
|
+
|
|
30
|
+
# Type the text
|
|
31
|
+
pyautogui.typewrite(text, interval=self.typing_delay)
|
|
32
|
+
|
|
33
|
+
def type_text_instant(self, text):
|
|
34
|
+
"""Type text with clipboard paste (faster for long text)."""
|
|
35
|
+
if not text:
|
|
36
|
+
return
|
|
37
|
+
|
|
38
|
+
import pyperclip
|
|
39
|
+
|
|
40
|
+
# Save original clipboard
|
|
41
|
+
original = pyperclip.paste()
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
# Copy text to clipboard and paste
|
|
45
|
+
pyperclip.copy(text)
|
|
46
|
+
time.sleep(0.1)
|
|
47
|
+
pyautogui.keyDown('ctrl')
|
|
48
|
+
pyautogui.keyDown('v')
|
|
49
|
+
pyautogui.keyUp('v')
|
|
50
|
+
pyautogui.keyUp('ctrl')
|
|
51
|
+
time.sleep(0.1)
|
|
52
|
+
finally:
|
|
53
|
+
# Restore original clipboard
|
|
54
|
+
pyperclip.copy(original)
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: stttype
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Python based STT module running on GPU
|
|
5
|
+
Author: LucasApps
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/LucasApps/stttype
|
|
8
|
+
Project-URL: Issues, https://github.com/LucasApps/stttype/issues
|
|
9
|
+
Keywords: stt,speech-to-text,whisper,voice,typing,keyboard
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
15
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
16
|
+
Classifier: Operating System :: MacOS
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
|
|
23
|
+
Classifier: Topic :: Utilities
|
|
24
|
+
Requires-Python: >=3.9
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
Requires-Dist: faster-whisper>=1.0.0
|
|
27
|
+
Requires-Dist: sounddevice>=0.4.6
|
|
28
|
+
Requires-Dist: soundfile>=0.12.1
|
|
29
|
+
Requires-Dist: numpy>=1.24.0
|
|
30
|
+
Requires-Dist: pynput>=1.7.0
|
|
31
|
+
Requires-Dist: pyautogui>=0.9.54
|
|
32
|
+
Requires-Dist: pyperclip>=1.8.2
|
|
33
|
+
Requires-Dist: pystray>=0.19.4
|
|
34
|
+
Requires-Dist: pillow>=10.0.0
|
|
35
|
+
Provides-Extra: gpu
|
|
36
|
+
Requires-Dist: torch>=2.0.0; extra == "gpu"
|
|
37
|
+
Requires-Dist: torchaudio>=2.0.0; extra == "gpu"
|
|
38
|
+
|
|
39
|
+
# STT Type v1.0.0
|
|
40
|
+
|
|
41
|
+
**Python based STT module running on GPU.**
|
|
42
|
+
|
|
43
|
+
Hold **F2** to record your voice, release to transcribe and type the text automatically at your cursor position.
|
|
44
|
+
|
|
45
|
+
## Features
|
|
46
|
+
|
|
47
|
+
- **Cross-platform** - Works on Windows, Linux, and macOS
|
|
48
|
+
- **Hold F2 to record** - Audio captures while key is held
|
|
49
|
+
- **Visual indicator** - Semi-transparent red dot with "F2" label appears in top-right corner while recording
|
|
50
|
+
- **Bell sounds** - Audio feedback when recording starts/stops
|
|
51
|
+
- **GPU-accelerated STT** - Uses faster-whisper on your NVIDIA GPU
|
|
52
|
+
- **Auto-typing** - Transcribed text is typed at cursor position
|
|
53
|
+
- **System tray mode** - Runs silently in background
|
|
54
|
+
- **Auto-startup** - Starts automatically on login
|
|
55
|
+
|
|
56
|
+
## Requirements
|
|
57
|
+
|
|
58
|
+
- Python 3.9+
|
|
59
|
+
- NVIDIA GPU with CUDA support (for GPU mode)
|
|
60
|
+
- Microphone
|
|
61
|
+
|
|
62
|
+
## Installation
|
|
63
|
+
|
|
64
|
+
### Prerequisites
|
|
65
|
+
|
|
66
|
+
Install PyTorch with CUDA support:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
# Windows/Linux
|
|
70
|
+
pip install torch torchaudio --index-url https://download.pytorch.org/whl/cu118
|
|
71
|
+
|
|
72
|
+
# macOS (CPU only, no CUDA)
|
|
73
|
+
pip install torch torchaudio
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### Windows
|
|
77
|
+
|
|
78
|
+
```powershell
|
|
79
|
+
cd "E:\Lucas\STT Type"
|
|
80
|
+
.\install.ps1
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Then restart PowerShell.
|
|
84
|
+
|
|
85
|
+
### Linux
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
cd /path/to/stttype
|
|
89
|
+
chmod +x install.sh
|
|
90
|
+
./install.sh
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
If `sounddevice` fails, install PortAudio:
|
|
94
|
+
```bash
|
|
95
|
+
# Debian/Ubuntu
|
|
96
|
+
sudo apt-get install portaudio19-dev
|
|
97
|
+
|
|
98
|
+
# Fedora
|
|
99
|
+
sudo dnf install portaudio-devel
|
|
100
|
+
|
|
101
|
+
# Arch
|
|
102
|
+
sudo pacman -S portaudio
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### macOS
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
cd /path/to/stttype
|
|
109
|
+
chmod +x install.sh
|
|
110
|
+
./install.sh
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
If `sounddevice` fails, install PortAudio:
|
|
114
|
+
```bash
|
|
115
|
+
brew install portaudio
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
**Note:** On macOS, you need to grant Accessibility permissions for `pynput` to capture global hotkeys. Go to **System Settings > Privacy & Security > Accessibility** and add your terminal application.
|
|
119
|
+
|
|
120
|
+
## Commands
|
|
121
|
+
|
|
122
|
+
Once installed, `stttype` works from any terminal.
|
|
123
|
+
|
|
124
|
+
| Command | Description |
|
|
125
|
+
|---------|-------------|
|
|
126
|
+
| `stttype --start` | Start STT Type in background |
|
|
127
|
+
| `stttype --shutdown` | Stop all STT Type processes |
|
|
128
|
+
| `stttype --status` | Check if STT Type is running |
|
|
129
|
+
| `stttype --restart` | Restart STT Type |
|
|
130
|
+
| `stttype --addtostartup` | Add to startup |
|
|
131
|
+
| `stttype --rmtostartup` | Remove from startup |
|
|
132
|
+
| `stttype --model <size>` | Set Whisper model (tiny/base/small/medium/large-v3) |
|
|
133
|
+
| `stttype --lang <code>` | Set language (en/zh/auto/etc) |
|
|
134
|
+
| `stttype --help` | Show help |
|
|
135
|
+
|
|
136
|
+
### Examples
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
# Start with default settings
|
|
140
|
+
stttype --start
|
|
141
|
+
|
|
142
|
+
# Start with a larger model for better accuracy
|
|
143
|
+
stttype --start --model small
|
|
144
|
+
|
|
145
|
+
# Start with Chinese language
|
|
146
|
+
stttype --start --lang zh
|
|
147
|
+
|
|
148
|
+
# Start with small model and auto-detect language
|
|
149
|
+
stttype --start --model small --lang auto
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## How It Works
|
|
153
|
+
|
|
154
|
+
1. **Hold F2** - A semi-transparent red dot with "F2" appears in the top-right corner, microphone starts recording
|
|
155
|
+
2. **Release F2** - Red dot disappears, recording stops
|
|
156
|
+
3. **GPU transcribes** - Whisper processes audio on your NVIDIA GPU
|
|
157
|
+
4. **Text is typed** - Result appears at your cursor position
|
|
158
|
+
|
|
159
|
+
## Models
|
|
160
|
+
|
|
161
|
+
| Model | Size | VRAM | Speed | Accuracy |
|
|
162
|
+
|-------|------|------|-------|----------|
|
|
163
|
+
| `tiny` | 39 MB | ~1 GB | Fastest | Basic |
|
|
164
|
+
| `base` | 74 MB | ~1 GB | Fast | Good |
|
|
165
|
+
| `small` | 244 MB | ~2 GB | Medium | Better |
|
|
166
|
+
| `medium` | 769 MB | ~5 GB | Slower | Best |
|
|
167
|
+
| `large-v3` | 1550 MB | ~10 GB | Slowest | Excellent |
|
|
168
|
+
|
|
169
|
+
Default is `base` - a good balance of speed and accuracy.
|
|
170
|
+
|
|
171
|
+
## Uninstall
|
|
172
|
+
|
|
173
|
+
### Windows
|
|
174
|
+
```powershell
|
|
175
|
+
cd "E:\Lucas\STT Type"
|
|
176
|
+
.\uninstall.ps1
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
### Linux/macOS
|
|
180
|
+
```bash
|
|
181
|
+
cd /path/to/stttype
|
|
182
|
+
chmod +x uninstall.sh
|
|
183
|
+
./uninstall.sh
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## Troubleshooting
|
|
187
|
+
|
|
188
|
+
| Issue | Solution |
|
|
189
|
+
|-------|----------|
|
|
190
|
+
| `stttype` not found | Restart terminal after installation |
|
|
191
|
+
| "CUDA not available" | Install NVIDIA drivers and CUDA toolkit |
|
|
192
|
+
| No sound on start/stop | Check system volume |
|
|
193
|
+
| Text not typing | Make sure the target window is focused |
|
|
194
|
+
| Model download fails | Check internet connection |
|
|
195
|
+
| Hotkeys don't work (macOS) | Grant Accessibility permissions to your terminal |
|
|
196
|
+
| Hotkeys don't work (Linux) | Make sure you're running under X11 (not Wayland) |
|
|
197
|
+
|
|
198
|
+
## Publish to PyPI
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
# Install build tools
|
|
202
|
+
pip install build twine
|
|
203
|
+
|
|
204
|
+
# Build
|
|
205
|
+
cd /path/to/stttype
|
|
206
|
+
python -m build
|
|
207
|
+
|
|
208
|
+
# Upload
|
|
209
|
+
python -m twine upload dist/*
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
When prompted:
|
|
213
|
+
- **Username**: `__token__`
|
|
214
|
+
- **Password**: Your PyPI API token
|
|
215
|
+
|
|
216
|
+
---
|
|
217
|
+
|
|
218
|
+
**Author**: LucasApps
|
|
219
|
+
**Version**: 1.0.0
|
|
220
|
+
**License**: MIT
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
stttype/__init__.py,sha256=Y-MmjPtmAfii3yr5r3dmES0MMp8au2xGJpLaKeXIZNQ,179
|
|
2
|
+
stttype/bell.py,sha256=z5akiAhETMhwkuoJ1u5ZrkODUeyDX-ZdsOUbABYafoM,1223
|
|
3
|
+
stttype/cli.py,sha256=4W7kRJcrcphBkRQ2vhz_MPKZehwthOzdYWe0jV_Hp9g,7145
|
|
4
|
+
stttype/main.py,sha256=CyCZv4SYLbZpgBcm-0X2Modjk9l4qpbUIBt2Eoe2qXI,6570
|
|
5
|
+
stttype/recorder.py,sha256=6LoGybW3XBplvXfRq_kIa-tPAEvNFUFDemL__Qk-ec4,1948
|
|
6
|
+
stttype/recorder_indicator.py,sha256=cdbeZ5O7Fcur0LMzdAVehSIQ7YfDtnYQWVvZnvkpzZ0,3174
|
|
7
|
+
stttype/transcriber.py,sha256=2XzfEm8l0v1nebFFBImAA0EkseXYd_rAe97GvL8NozY,3449
|
|
8
|
+
stttype/typer.py,sha256=Z8xpiTlLynuepvp86yKLPk910EFHBjfz66TRTW-Uttc,1471
|
|
9
|
+
stttype-1.0.0.dist-info/METADATA,sha256=V014Xdj3J7vd5URsJ-ZeSb3jt3xUy6RHvdvut7_gB6Y,6040
|
|
10
|
+
stttype-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
11
|
+
stttype-1.0.0.dist-info/entry_points.txt,sha256=N8yigAGrQw5ew4a4N7gFoje1Qr3E4n-ksDzHWVAuQ2I,45
|
|
12
|
+
stttype-1.0.0.dist-info/top_level.txt,sha256=MO-2G94oAqUfkf7WFy6ouxNQ7db4UShMcHqDTWRWv18,8
|
|
13
|
+
stttype-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
stttype
|