sonote 1.1.0b1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sonote-1.1.0b1/LICENSE +21 -0
- sonote-1.1.0b1/PKG-INFO +224 -0
- sonote-1.1.0b1/README.md +184 -0
- sonote-1.1.0b1/pyproject.toml +65 -0
- sonote-1.1.0b1/setup.cfg +4 -0
- sonote-1.1.0b1/sonote.egg-info/PKG-INFO +224 -0
- sonote-1.1.0b1/sonote.egg-info/SOURCES.txt +55 -0
- sonote-1.1.0b1/sonote.egg-info/dependency_links.txt +1 -0
- sonote-1.1.0b1/sonote.egg-info/entry_points.txt +2 -0
- sonote-1.1.0b1/sonote.egg-info/requires.txt +21 -0
- sonote-1.1.0b1/sonote.egg-info/top_level.txt +1 -0
- sonote-1.1.0b1/src/__init__.py +0 -0
- sonote-1.1.0b1/src/audio_capture.py +391 -0
- sonote-1.1.0b1/src/cli.py +2290 -0
- sonote-1.1.0b1/src/continuous.py +204 -0
- sonote-1.1.0b1/src/db.py +219 -0
- sonote-1.1.0b1/src/diarize.py +770 -0
- sonote-1.1.0b1/src/domain_keywords.py +336 -0
- sonote-1.1.0b1/src/download.py +292 -0
- sonote-1.1.0b1/src/export.py +589 -0
- sonote-1.1.0b1/src/meeting_writer.py +363 -0
- sonote-1.1.0b1/src/merge.py +147 -0
- sonote-1.1.0b1/src/paths.py +73 -0
- sonote-1.1.0b1/src/polish.py +1020 -0
- sonote-1.1.0b1/src/postprocess.py +306 -0
- sonote-1.1.0b1/src/probe.py +349 -0
- sonote-1.1.0b1/src/runtime_env.py +41 -0
- sonote-1.1.0b1/src/server.py +1860 -0
- sonote-1.1.0b1/src/transcribe.py +353 -0
- sonote-1.1.0b1/src/tray.py +180 -0
- sonote-1.1.0b1/src/whisper_worker.py +322 -0
- sonote-1.1.0b1/static/demo.gif +0 -0
- sonote-1.1.0b1/static/speaker_profile.html +859 -0
- sonote-1.1.0b1/static/viewer.html +5221 -0
- sonote-1.1.0b1/tests/test_api.py +494 -0
- sonote-1.1.0b1/tests/test_audio_capture.py +78 -0
- sonote-1.1.0b1/tests/test_cli_dispatch.py +101 -0
- sonote-1.1.0b1/tests/test_cli_json.py +621 -0
- sonote-1.1.0b1/tests/test_domain_keywords.py +32 -0
- sonote-1.1.0b1/tests/test_e2e.py +379 -0
- sonote-1.1.0b1/tests/test_embedding_mock.py +396 -0
- sonote-1.1.0b1/tests/test_export.py +336 -0
- sonote-1.1.0b1/tests/test_meeting_accuracy_helpers.py +140 -0
- sonote-1.1.0b1/tests/test_meeting_startup.py +285 -0
- sonote-1.1.0b1/tests/test_meeting_writer.py +274 -0
- sonote-1.1.0b1/tests/test_polish.py +184 -0
- sonote-1.1.0b1/tests/test_polish_mock.py +587 -0
- sonote-1.1.0b1/tests/test_postprocess.py +190 -0
- sonote-1.1.0b1/tests/test_profiles_api.py +159 -0
- sonote-1.1.0b1/tests/test_search_api.py +442 -0
- sonote-1.1.0b1/tests/test_search_integration.py +449 -0
- sonote-1.1.0b1/tests/test_security_hardening.py +530 -0
- sonote-1.1.0b1/tests/test_server.py +220 -0
- sonote-1.1.0b1/tests/test_server_keywords.py +106 -0
- sonote-1.1.0b1/tests/test_sessions.py +583 -0
- sonote-1.1.0b1/tests/test_speaker_auto_register.py +612 -0
- sonote-1.1.0b1/tests/test_websocket.py +443 -0
sonote-1.1.0b1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 tellang
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
sonote-1.1.0b1/PKG-INFO
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sonote
|
|
3
|
+
Version: 1.1.0b1
|
|
4
|
+
Summary: AI 에이전트를 위한 소리 노트 — 실시간 한국어 음성 전사 CLI
|
|
5
|
+
Author: tellang
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/tellang/sonote
|
|
8
|
+
Project-URL: Repository, https://github.com/tellang/sonote
|
|
9
|
+
Project-URL: Issues, https://github.com/tellang/sonote/issues
|
|
10
|
+
Keywords: sonote,whisper,stt,korean,transcription,faster-whisper,meeting,live-stream,ai-agent
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
|
|
18
|
+
Requires-Python: >=3.11
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: faster-whisper>=1.2.0
|
|
22
|
+
Requires-Dist: yt-dlp>=2024.0.0
|
|
23
|
+
Requires-Dist: nvidia-cublas-cu12>=12.0.0
|
|
24
|
+
Requires-Dist: nvidia-cudnn-cu12>=9.0.0
|
|
25
|
+
Requires-Dist: httpx>=0.27.0
|
|
26
|
+
Requires-Dist: sounddevice>=0.4.6
|
|
27
|
+
Requires-Dist: numpy>=1.26.0
|
|
28
|
+
Requires-Dist: fastapi>=0.110.0
|
|
29
|
+
Requires-Dist: uvicorn>=0.27.0
|
|
30
|
+
Requires-Dist: watchdog>=4.0.0
|
|
31
|
+
Requires-Dist: python-multipart>=0.0.9
|
|
32
|
+
Provides-Extra: diarize
|
|
33
|
+
Requires-Dist: pyannote-audio>=3.1; extra == "diarize"
|
|
34
|
+
Requires-Dist: torch>=2.0.0; extra == "diarize"
|
|
35
|
+
Provides-Extra: docx
|
|
36
|
+
Requires-Dist: python-docx>=1.0.0; extra == "docx"
|
|
37
|
+
Provides-Extra: pdf
|
|
38
|
+
Requires-Dist: fpdf2>=2.8; extra == "pdf"
|
|
39
|
+
Dynamic: license-file
|
|
40
|
+
|
|
41
|
+
<p align="center">
|
|
42
|
+
<picture>
|
|
43
|
+
<source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/tellang/sonote/main/static/logo-dark.svg">
|
|
44
|
+
<source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/tellang/sonote/main/static/logo-light.svg">
|
|
45
|
+
<img alt="sonote" src="https://raw.githubusercontent.com/tellang/sonote/main/static/logo.svg" width="240">
|
|
46
|
+
</picture>
|
|
47
|
+
</p>
|
|
48
|
+
|
|
49
|
+
<h1 align="center">sonote</h1>
|
|
50
|
+
|
|
51
|
+
<p align="center">
|
|
52
|
+
<strong>Beyond Transcription, Toward Meeting Intelligence.</strong><br>
|
|
53
|
+
Real-time Korean STT engine built for AI agents and professional meetings.
|
|
54
|
+
</p>
|
|
55
|
+
|
|
56
|
+
<p align="center">
|
|
57
|
+
<a href="https://github.com/tellang/sonote/actions"><img src="https://img.shields.io/github/actions/workflow/status/tellang/sonote/ci.yml?style=flat-square" alt="CI"></a>
|
|
58
|
+
<a href="https://github.com/tellang/sonote/stargazers"><img src="https://img.shields.io/github/stars/tellang/sonote?style=flat-square&color=black" alt="Stars"></a>
|
|
59
|
+
<a href="LICENSE"><img src="https://img.shields.io/badge/license-mit-yellow?style=flat-square" alt="License"></a>
|
|
60
|
+
</p>
|
|
61
|
+
|
|
62
|
+
<p align="center">
|
|
63
|
+
<a href="docs/GUIDE.md">Documentation</a> ·
|
|
64
|
+
<a href="docs/GUIDE.md#getting-started">Getting Started</a> ·
|
|
65
|
+
<a href="docs/SPEAKER_DIARIZATION_RESEARCH.md">Research</a> ·
|
|
66
|
+
<a href="https://github.com/tellang/sonote/issues">Issues</a>
|
|
67
|
+
</p>
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## Demo
|
|
72
|
+
|
|
73
|
+
<p align="center">
|
|
74
|
+
<img src="static/demo.gif" alt="sonote real-time transcription demo" width="700">
|
|
75
|
+
</p>
|
|
76
|
+
|
|
77
|
+
## Installation
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
git clone https://github.com/tellang/sonote && cd sonote
|
|
81
|
+
uv sync
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
> [!NOTE]
|
|
85
|
+
> ffmpeg 필수: `choco install ffmpeg` (Windows) / `brew install ffmpeg` (macOS)
|
|
86
|
+
> 화자 분리: `pip install -e ".[diarize]"` + `HF_TOKEN` 환경변수
|
|
87
|
+
|
|
88
|
+
## Quick Start
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
# 회의 실시간 전사 (Viewer: http://localhost:8000)
|
|
92
|
+
sonote meeting
|
|
93
|
+
|
|
94
|
+
# YouTube 라이브 올인원 (스캔 → 병렬 다운 → 변환 → 병합)
|
|
95
|
+
sonote auto <VIDEO_URL>
|
|
96
|
+
|
|
97
|
+
# 로컬 오디오 변환
|
|
98
|
+
sonote transcribe audio.wav
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Commands
|
|
102
|
+
|
|
103
|
+
| Command | Description |
|
|
104
|
+
|---------|-------------|
|
|
105
|
+
| `sonote meeting` | 마이크 → 화자 분리 → SSE 자막 + 파일 저장 |
|
|
106
|
+
| `sonote auto <URL>` | YouTube 올인원 (BGM 자동 분류, 병렬 다운로드, 변환, 병합) |
|
|
107
|
+
| `sonote live <URL>` | YouTube 연속 실시간 변환 |
|
|
108
|
+
| `sonote transcribe <FILE>` | 로컬 오디오/영상 변환 |
|
|
109
|
+
| `sonote detect <URL>` | BGM↔음성 경계 탐색 |
|
|
110
|
+
| `sonote download <URL>` | YouTube 오디오 다운로드 |
|
|
111
|
+
|
|
112
|
+
<details>
|
|
113
|
+
<summary>주요 옵션</summary>
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
# 긴 파일 청크 분할
|
|
117
|
+
sonote transcribe long.wav --chunk-minutes 10
|
|
118
|
+
|
|
119
|
+
# SRT 자막 출력
|
|
120
|
+
sonote transcribe audio.wav --fmt srt
|
|
121
|
+
|
|
122
|
+
# 기존 스크립트에 이어붙이기
|
|
123
|
+
sonote auto "URL" --resume transcript.txt
|
|
124
|
+
|
|
125
|
+
# 마이크 장치 선택
|
|
126
|
+
sonote meeting --list-devices
|
|
127
|
+
sonote meeting --device 1
|
|
128
|
+
|
|
129
|
+
# 화자 분리 비활성화
|
|
130
|
+
sonote meeting --no-diarize
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
</details>
|
|
134
|
+
|
|
135
|
+
## Key Features
|
|
136
|
+
|
|
137
|
+
- **Zero-Latency Tracking** — 마이크 입력과 YouTube 라이브를 최소 지연으로 실시간 추적
|
|
138
|
+
- **Intelligent Diarization** — 미등록 화자 임베딩 추적, 5개+ 세그먼트 누적 시 자동 등록 후보 마킹
|
|
139
|
+
- **Professional Viewer** — 단일 HTML 웹 인터페이스, 다크/라이트 모드, 실시간 검색
|
|
140
|
+
- **AI-Driven Refinement** — LLM 연동 회의 요약 및 스크립트 교정 (Codex STT + Gemini 요약)
|
|
141
|
+
- **Real-time API** — SSE + WebSocket 양방향 통신, 자동 재연결, 세션별 검색 API
|
|
142
|
+
|
|
143
|
+
## Performance
|
|
144
|
+
|
|
145
|
+
| Model | RTF (Speed) | CER (Accuracy) | VRAM |
|
|
146
|
+
| :--- | :--- | :--- | :--- |
|
|
147
|
+
| **large-v3-turbo** (default) | **< 0.05** | ~16.4% | ~3.5 GB |
|
|
148
|
+
| large-v3 | ~0.12 | **~11.2%** | ~6.0 GB |
|
|
149
|
+
| small | < 0.02 | ~22.8% | ~1.5 GB |
|
|
150
|
+
|
|
151
|
+
> [!TIP]
|
|
152
|
+
> NVIDIA RTX 4070+ 환경에서 CUDA float16 최적화. RTF 0.05 미만으로 실시간 이상 속도.
|
|
153
|
+
|
|
154
|
+
<details>
|
|
155
|
+
<summary>API & WebSocket</summary>
|
|
156
|
+
|
|
157
|
+
### Search API
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
# 키워드 검색
|
|
161
|
+
curl "http://127.0.0.1:8000/api/sessions/{session_id}/search?query=회의"
|
|
162
|
+
|
|
163
|
+
# 화자 + 시간 범위 필터
|
|
164
|
+
curl "...?query=결정&speaker=김팀장&time_start=300&time_end=1200"
|
|
165
|
+
|
|
166
|
+
# 정규식 검색
|
|
167
|
+
curl "...?query=일정|마감®ex=true"
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### WebSocket
|
|
171
|
+
|
|
172
|
+
`WS /ws/transcribe` — 전사/교정/세션 이벤트 양방향 통신. SSE 폴백, 30초 하트비트, 자동 재연결.
|
|
173
|
+
|
|
174
|
+
### Speaker Auto-Registration
|
|
175
|
+
|
|
176
|
+
| Endpoint | Method | Description |
|
|
177
|
+
|----------|--------|-------------|
|
|
178
|
+
| `/api/speakers/unknown` | GET | 미등록 화자 목록 |
|
|
179
|
+
| `/api/speakers/auto-register` | POST | 미등록 화자 프로필 등록 |
|
|
180
|
+
| `/api/speakers/unknown/{id}` | DELETE | 미등록 화자 무시 |
|
|
181
|
+
|
|
182
|
+
</details>
|
|
183
|
+
|
|
184
|
+
<details>
|
|
185
|
+
<summary>Build & Benchmark</summary>
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
# Windows EXE 패키징
|
|
189
|
+
uv run python scripts/build.py --onefile
|
|
190
|
+
|
|
191
|
+
# 모델 벤치마크
|
|
192
|
+
uv run python scripts/benchmark_models.py --models small large-v3-turbo
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
</details>
|
|
196
|
+
|
|
197
|
+
## Project Structure
|
|
198
|
+
|
|
199
|
+
```text
|
|
200
|
+
src/
|
|
201
|
+
├── cli.py # CLI 진입점
|
|
202
|
+
├── server.py # FastAPI SSE + WebSocket
|
|
203
|
+
├── transcribe.py # Faster-Whisper 추론 코어
|
|
204
|
+
├── diarize.py # 화자 분리 (pyannote-audio)
|
|
205
|
+
├── download.py # YouTube 오디오 다운로드
|
|
206
|
+
├── continuous.py # 연속 실시간 변환
|
|
207
|
+
├── polish.py # LLM 후처리
|
|
208
|
+
└── whisper_worker.py # CUDA 격리 STT 워커
|
|
209
|
+
static/
|
|
210
|
+
└── viewer.html # 자막 뷰어 (단일 HTML)
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
## Documentation
|
|
214
|
+
|
|
215
|
+
Full command references, architecture, and troubleshooting: **[User Guide](docs/GUIDE.md)**
|
|
216
|
+
|
|
217
|
+
---
|
|
218
|
+
|
|
219
|
+
<div align="right">
|
|
220
|
+
|
|
221
|
+
**Copyright (c) 2025 Tellang**
|
|
222
|
+
Licensed under the [MIT License](LICENSE).
|
|
223
|
+
|
|
224
|
+
</div>
|
sonote-1.1.0b1/README.md
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<picture>
|
|
3
|
+
<source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/tellang/sonote/main/static/logo-dark.svg">
|
|
4
|
+
<source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/tellang/sonote/main/static/logo-light.svg">
|
|
5
|
+
<img alt="sonote" src="https://raw.githubusercontent.com/tellang/sonote/main/static/logo.svg" width="240">
|
|
6
|
+
</picture>
|
|
7
|
+
</p>
|
|
8
|
+
|
|
9
|
+
<h1 align="center">sonote</h1>
|
|
10
|
+
|
|
11
|
+
<p align="center">
|
|
12
|
+
<strong>Beyond Transcription, Toward Meeting Intelligence.</strong><br>
|
|
13
|
+
Real-time Korean STT engine built for AI agents and professional meetings.
|
|
14
|
+
</p>
|
|
15
|
+
|
|
16
|
+
<p align="center">
|
|
17
|
+
<a href="https://github.com/tellang/sonote/actions"><img src="https://img.shields.io/github/actions/workflow/status/tellang/sonote/ci.yml?style=flat-square" alt="CI"></a>
|
|
18
|
+
<a href="https://github.com/tellang/sonote/stargazers"><img src="https://img.shields.io/github/stars/tellang/sonote?style=flat-square&color=black" alt="Stars"></a>
|
|
19
|
+
<a href="LICENSE"><img src="https://img.shields.io/badge/license-mit-yellow?style=flat-square" alt="License"></a>
|
|
20
|
+
</p>
|
|
21
|
+
|
|
22
|
+
<p align="center">
|
|
23
|
+
<a href="docs/GUIDE.md">Documentation</a> ·
|
|
24
|
+
<a href="docs/GUIDE.md#getting-started">Getting Started</a> ·
|
|
25
|
+
<a href="docs/SPEAKER_DIARIZATION_RESEARCH.md">Research</a> ·
|
|
26
|
+
<a href="https://github.com/tellang/sonote/issues">Issues</a>
|
|
27
|
+
</p>
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Demo
|
|
32
|
+
|
|
33
|
+
<p align="center">
|
|
34
|
+
<img src="static/demo.gif" alt="sonote real-time transcription demo" width="700">
|
|
35
|
+
</p>
|
|
36
|
+
|
|
37
|
+
## Installation
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
git clone https://github.com/tellang/sonote && cd sonote
|
|
41
|
+
uv sync
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
> [!NOTE]
|
|
45
|
+
> ffmpeg 필수: `choco install ffmpeg` (Windows) / `brew install ffmpeg` (macOS)
|
|
46
|
+
> 화자 분리: `pip install -e ".[diarize]"` + `HF_TOKEN` 환경변수
|
|
47
|
+
|
|
48
|
+
## Quick Start
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
# 회의 실시간 전사 (Viewer: http://localhost:8000)
|
|
52
|
+
sonote meeting
|
|
53
|
+
|
|
54
|
+
# YouTube 라이브 올인원 (스캔 → 병렬 다운 → 변환 → 병합)
|
|
55
|
+
sonote auto <VIDEO_URL>
|
|
56
|
+
|
|
57
|
+
# 로컬 오디오 변환
|
|
58
|
+
sonote transcribe audio.wav
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Commands
|
|
62
|
+
|
|
63
|
+
| Command | Description |
|
|
64
|
+
|---------|-------------|
|
|
65
|
+
| `sonote meeting` | 마이크 → 화자 분리 → SSE 자막 + 파일 저장 |
|
|
66
|
+
| `sonote auto <URL>` | YouTube 올인원 (BGM 자동 분류, 병렬 다운로드, 변환, 병합) |
|
|
67
|
+
| `sonote live <URL>` | YouTube 연속 실시간 변환 |
|
|
68
|
+
| `sonote transcribe <FILE>` | 로컬 오디오/영상 변환 |
|
|
69
|
+
| `sonote detect <URL>` | BGM↔음성 경계 탐색 |
|
|
70
|
+
| `sonote download <URL>` | YouTube 오디오 다운로드 |
|
|
71
|
+
|
|
72
|
+
<details>
|
|
73
|
+
<summary>주요 옵션</summary>
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
# 긴 파일 청크 분할
|
|
77
|
+
sonote transcribe long.wav --chunk-minutes 10
|
|
78
|
+
|
|
79
|
+
# SRT 자막 출력
|
|
80
|
+
sonote transcribe audio.wav --fmt srt
|
|
81
|
+
|
|
82
|
+
# 기존 스크립트에 이어붙이기
|
|
83
|
+
sonote auto "URL" --resume transcript.txt
|
|
84
|
+
|
|
85
|
+
# 마이크 장치 선택
|
|
86
|
+
sonote meeting --list-devices
|
|
87
|
+
sonote meeting --device 1
|
|
88
|
+
|
|
89
|
+
# 화자 분리 비활성화
|
|
90
|
+
sonote meeting --no-diarize
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
</details>
|
|
94
|
+
|
|
95
|
+
## Key Features
|
|
96
|
+
|
|
97
|
+
- **Zero-Latency Tracking** — 마이크 입력과 YouTube 라이브를 최소 지연으로 실시간 추적
|
|
98
|
+
- **Intelligent Diarization** — 미등록 화자 임베딩 추적, 5개+ 세그먼트 누적 시 자동 등록 후보 마킹
|
|
99
|
+
- **Professional Viewer** — 단일 HTML 웹 인터페이스, 다크/라이트 모드, 실시간 검색
|
|
100
|
+
- **AI-Driven Refinement** — LLM 연동 회의 요약 및 스크립트 교정 (Codex STT + Gemini 요약)
|
|
101
|
+
- **Real-time API** — SSE + WebSocket 양방향 통신, 자동 재연결, 세션별 검색 API
|
|
102
|
+
|
|
103
|
+
## Performance
|
|
104
|
+
|
|
105
|
+
| Model | RTF (Speed) | CER (Accuracy) | VRAM |
|
|
106
|
+
| :--- | :--- | :--- | :--- |
|
|
107
|
+
| **large-v3-turbo** (default) | **< 0.05** | ~16.4% | ~3.5 GB |
|
|
108
|
+
| large-v3 | ~0.12 | **~11.2%** | ~6.0 GB |
|
|
109
|
+
| small | < 0.02 | ~22.8% | ~1.5 GB |
|
|
110
|
+
|
|
111
|
+
> [!TIP]
|
|
112
|
+
> NVIDIA RTX 4070+ 환경에서 CUDA float16 최적화. RTF 0.05 미만으로 실시간 이상 속도.
|
|
113
|
+
|
|
114
|
+
<details>
|
|
115
|
+
<summary>API & WebSocket</summary>
|
|
116
|
+
|
|
117
|
+
### Search API
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
# 키워드 검색
|
|
121
|
+
curl "http://127.0.0.1:8000/api/sessions/{session_id}/search?query=회의"
|
|
122
|
+
|
|
123
|
+
# 화자 + 시간 범위 필터
|
|
124
|
+
curl "...?query=결정&speaker=김팀장&time_start=300&time_end=1200"
|
|
125
|
+
|
|
126
|
+
# 정규식 검색
|
|
127
|
+
curl "...?query=일정|마감®ex=true"
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### WebSocket
|
|
131
|
+
|
|
132
|
+
`WS /ws/transcribe` — 전사/교정/세션 이벤트 양방향 통신. SSE 폴백, 30초 하트비트, 자동 재연결.
|
|
133
|
+
|
|
134
|
+
### Speaker Auto-Registration
|
|
135
|
+
|
|
136
|
+
| Endpoint | Method | Description |
|
|
137
|
+
|----------|--------|-------------|
|
|
138
|
+
| `/api/speakers/unknown` | GET | 미등록 화자 목록 |
|
|
139
|
+
| `/api/speakers/auto-register` | POST | 미등록 화자 프로필 등록 |
|
|
140
|
+
| `/api/speakers/unknown/{id}` | DELETE | 미등록 화자 무시 |
|
|
141
|
+
|
|
142
|
+
</details>
|
|
143
|
+
|
|
144
|
+
<details>
|
|
145
|
+
<summary>Build & Benchmark</summary>
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
# Windows EXE 패키징
|
|
149
|
+
uv run python scripts/build.py --onefile
|
|
150
|
+
|
|
151
|
+
# 모델 벤치마크
|
|
152
|
+
uv run python scripts/benchmark_models.py --models small large-v3-turbo
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
</details>
|
|
156
|
+
|
|
157
|
+
## Project Structure
|
|
158
|
+
|
|
159
|
+
```text
|
|
160
|
+
src/
|
|
161
|
+
├── cli.py # CLI 진입점
|
|
162
|
+
├── server.py # FastAPI SSE + WebSocket
|
|
163
|
+
├── transcribe.py # Faster-Whisper 추론 코어
|
|
164
|
+
├── diarize.py # 화자 분리 (pyannote-audio)
|
|
165
|
+
├── download.py # YouTube 오디오 다운로드
|
|
166
|
+
├── continuous.py # 연속 실시간 변환
|
|
167
|
+
├── polish.py # LLM 후처리
|
|
168
|
+
└── whisper_worker.py # CUDA 격리 STT 워커
|
|
169
|
+
static/
|
|
170
|
+
└── viewer.html # 자막 뷰어 (단일 HTML)
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
## Documentation
|
|
174
|
+
|
|
175
|
+
Full command references, architecture, and troubleshooting: **[User Guide](docs/GUIDE.md)**
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
<div align="right">
|
|
180
|
+
|
|
181
|
+
**Copyright (c) 2025 Tellang**
|
|
182
|
+
Licensed under the [MIT License](LICENSE).
|
|
183
|
+
|
|
184
|
+
</div>
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "sonote"
|
|
3
|
+
version = "1.1.0b1"
|
|
4
|
+
description = "AI 에이전트를 위한 소리 노트 — 실시간 한국어 음성 전사 CLI"
|
|
5
|
+
requires-python = ">=3.11"
|
|
6
|
+
authors = [{ name = "tellang" }]
|
|
7
|
+
license = { text = "MIT" }
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
keywords = ["sonote", "whisper", "stt", "korean", "transcription", "faster-whisper", "meeting", "live-stream", "ai-agent"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Development Status :: 3 - Alpha",
|
|
12
|
+
"Intended Audience :: Developers",
|
|
13
|
+
"License :: OSI Approved :: MIT License",
|
|
14
|
+
"Operating System :: OS Independent",
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"Programming Language :: Python :: 3.11",
|
|
17
|
+
"Topic :: Multimedia :: Sound/Audio :: Speech",
|
|
18
|
+
]
|
|
19
|
+
dependencies = [
|
|
20
|
+
"faster-whisper>=1.2.0",
|
|
21
|
+
"yt-dlp>=2024.0.0",
|
|
22
|
+
"nvidia-cublas-cu12>=12.0.0",
|
|
23
|
+
"nvidia-cudnn-cu12>=9.0.0",
|
|
24
|
+
"httpx>=0.27.0",
|
|
25
|
+
"sounddevice>=0.4.6",
|
|
26
|
+
"numpy>=1.26.0",
|
|
27
|
+
"fastapi>=0.110.0",
|
|
28
|
+
"uvicorn>=0.27.0",
|
|
29
|
+
"watchdog>=4.0.0",
|
|
30
|
+
"python-multipart>=0.0.9",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[project.optional-dependencies]
|
|
34
|
+
diarize = ["pyannote-audio>=3.1", "torch>=2.0.0"]
|
|
35
|
+
docx = ["python-docx>=1.0.0"]
|
|
36
|
+
pdf = [
|
|
37
|
+
"fpdf2>=2.8",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
[project.scripts]
|
|
41
|
+
sonote = "src.cli:main"
|
|
42
|
+
|
|
43
|
+
[project.urls]
|
|
44
|
+
Homepage = "https://github.com/tellang/sonote"
|
|
45
|
+
Repository = "https://github.com/tellang/sonote"
|
|
46
|
+
Issues = "https://github.com/tellang/sonote/issues"
|
|
47
|
+
|
|
48
|
+
[build-system]
|
|
49
|
+
requires = ["setuptools>=75.0"]
|
|
50
|
+
build-backend = "setuptools.build_meta"
|
|
51
|
+
|
|
52
|
+
[tool.setuptools.packages.find]
|
|
53
|
+
where = ["."]
|
|
54
|
+
include = ["src*"]
|
|
55
|
+
|
|
56
|
+
[tool.setuptools.package-data]
|
|
57
|
+
src = ["../static/*"]
|
|
58
|
+
|
|
59
|
+
[tool.mypy]
|
|
60
|
+
python_version = "3.11"
|
|
61
|
+
ignore_missing_imports = true
|
|
62
|
+
warn_unused_ignores = true
|
|
63
|
+
|
|
64
|
+
[tool.ruff]
|
|
65
|
+
line-length = 100
|
sonote-1.1.0b1/setup.cfg
ADDED