voice-mode 3.34.3__py3-none-any.whl → 4.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. voice_mode/__version__.py +1 -1
  2. voice_mode/cli.py +8 -0
  3. voice_mode/cli_commands/pronounce_commands.py +223 -0
  4. voice_mode/cli_commands/transcribe.py +141 -0
  5. voice_mode/config.py +139 -37
  6. voice_mode/data/default_pronunciation.yaml +268 -0
  7. voice_mode/frontend/.next/BUILD_ID +1 -0
  8. voice_mode/frontend/.next/app-build-manifest.json +28 -0
  9. voice_mode/frontend/.next/app-path-routes-manifest.json +1 -0
  10. voice_mode/frontend/.next/build-manifest.json +32 -0
  11. voice_mode/frontend/.next/export-marker.json +1 -0
  12. voice_mode/frontend/.next/images-manifest.json +1 -0
  13. voice_mode/frontend/.next/next-minimal-server.js.nft.json +1 -0
  14. voice_mode/frontend/.next/next-server.js.nft.json +1 -0
  15. voice_mode/frontend/.next/package.json +1 -0
  16. voice_mode/frontend/.next/prerender-manifest.json +1 -0
  17. voice_mode/frontend/.next/react-loadable-manifest.json +1 -0
  18. voice_mode/frontend/.next/required-server-files.json +1 -0
  19. voice_mode/frontend/.next/routes-manifest.json +1 -0
  20. voice_mode/frontend/.next/server/app/_not-found/page.js +1 -0
  21. voice_mode/frontend/.next/server/app/_not-found/page.js.nft.json +1 -0
  22. voice_mode/frontend/.next/server/app/_not-found/page_client-reference-manifest.js +1 -0
  23. voice_mode/frontend/.next/server/app/_not-found.html +1 -0
  24. voice_mode/frontend/.next/server/app/_not-found.meta +6 -0
  25. voice_mode/frontend/.next/server/app/_not-found.rsc +9 -0
  26. voice_mode/frontend/.next/server/app/api/connection-details/route.js +12 -0
  27. voice_mode/frontend/.next/server/app/api/connection-details/route.js.nft.json +1 -0
  28. voice_mode/frontend/.next/server/app/favicon.ico/route.js +12 -0
  29. voice_mode/frontend/.next/server/app/favicon.ico/route.js.nft.json +1 -0
  30. voice_mode/frontend/.next/server/app/favicon.ico.body +0 -0
  31. voice_mode/frontend/.next/server/app/favicon.ico.meta +1 -0
  32. voice_mode/frontend/.next/server/app/index.html +1 -0
  33. voice_mode/frontend/.next/server/app/index.meta +5 -0
  34. voice_mode/frontend/.next/server/app/index.rsc +7 -0
  35. voice_mode/frontend/.next/server/app/page.js +11 -0
  36. voice_mode/frontend/.next/server/app/page.js.nft.json +1 -0
  37. voice_mode/frontend/.next/server/app/page_client-reference-manifest.js +1 -0
  38. voice_mode/frontend/.next/server/app-paths-manifest.json +6 -0
  39. voice_mode/frontend/.next/server/chunks/463.js +1 -0
  40. voice_mode/frontend/.next/server/chunks/682.js +6 -0
  41. voice_mode/frontend/.next/server/chunks/948.js +2 -0
  42. voice_mode/frontend/.next/server/chunks/994.js +2 -0
  43. voice_mode/frontend/.next/server/chunks/font-manifest.json +1 -0
  44. voice_mode/frontend/.next/server/font-manifest.json +1 -0
  45. voice_mode/frontend/.next/server/functions-config-manifest.json +1 -0
  46. voice_mode/frontend/.next/server/interception-route-rewrite-manifest.js +1 -0
  47. voice_mode/frontend/.next/server/middleware-build-manifest.js +1 -0
  48. voice_mode/frontend/.next/server/middleware-manifest.json +6 -0
  49. voice_mode/frontend/.next/server/middleware-react-loadable-manifest.js +1 -0
  50. voice_mode/frontend/.next/server/next-font-manifest.js +1 -0
  51. voice_mode/frontend/.next/server/next-font-manifest.json +1 -0
  52. voice_mode/frontend/.next/server/pages/404.html +1 -0
  53. voice_mode/frontend/.next/server/pages/500.html +1 -0
  54. voice_mode/frontend/.next/server/pages/_app.js +1 -0
  55. voice_mode/frontend/.next/server/pages/_app.js.nft.json +1 -0
  56. voice_mode/frontend/.next/server/pages/_document.js +1 -0
  57. voice_mode/frontend/.next/server/pages/_document.js.nft.json +1 -0
  58. voice_mode/frontend/.next/server/pages/_error.js +1 -0
  59. voice_mode/frontend/.next/server/pages/_error.js.nft.json +1 -0
  60. voice_mode/frontend/.next/server/pages-manifest.json +1 -0
  61. voice_mode/frontend/.next/server/server-reference-manifest.js +1 -0
  62. voice_mode/frontend/.next/server/server-reference-manifest.json +1 -0
  63. voice_mode/frontend/.next/server/webpack-runtime.js +1 -0
  64. voice_mode/frontend/.next/standalone/.next/BUILD_ID +1 -0
  65. voice_mode/frontend/.next/standalone/.next/app-build-manifest.json +28 -0
  66. voice_mode/frontend/.next/standalone/.next/app-path-routes-manifest.json +1 -0
  67. voice_mode/frontend/.next/standalone/.next/build-manifest.json +32 -0
  68. voice_mode/frontend/.next/standalone/.next/package.json +1 -0
  69. voice_mode/frontend/.next/standalone/.next/prerender-manifest.json +1 -0
  70. voice_mode/frontend/.next/standalone/.next/react-loadable-manifest.json +1 -0
  71. voice_mode/frontend/.next/standalone/.next/required-server-files.json +1 -0
  72. voice_mode/frontend/.next/standalone/.next/routes-manifest.json +1 -0
  73. voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page.js +1 -0
  74. voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page.js.nft.json +1 -0
  75. voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page_client-reference-manifest.js +1 -0
  76. voice_mode/frontend/.next/standalone/.next/server/app/_not-found.html +1 -0
  77. voice_mode/frontend/.next/standalone/.next/server/app/_not-found.meta +6 -0
  78. voice_mode/frontend/.next/standalone/.next/server/app/_not-found.rsc +9 -0
  79. voice_mode/frontend/.next/standalone/.next/server/app/api/connection-details/route.js +12 -0
  80. voice_mode/frontend/.next/standalone/.next/server/app/api/connection-details/route.js.nft.json +1 -0
  81. voice_mode/frontend/.next/standalone/.next/server/app/favicon.ico/route.js +12 -0
  82. voice_mode/frontend/.next/standalone/.next/server/app/favicon.ico/route.js.nft.json +1 -0
  83. voice_mode/frontend/.next/standalone/.next/server/app/favicon.ico.body +0 -0
  84. voice_mode/frontend/.next/standalone/.next/server/app/favicon.ico.meta +1 -0
  85. voice_mode/frontend/.next/standalone/.next/server/app/index.html +1 -0
  86. voice_mode/frontend/.next/standalone/.next/server/app/index.meta +5 -0
  87. voice_mode/frontend/.next/standalone/.next/server/app/index.rsc +7 -0
  88. voice_mode/frontend/.next/standalone/.next/server/app/page.js +11 -0
  89. voice_mode/frontend/.next/standalone/.next/server/app/page.js.nft.json +1 -0
  90. voice_mode/frontend/.next/standalone/.next/server/app/page_client-reference-manifest.js +1 -0
  91. voice_mode/frontend/.next/standalone/.next/server/app-paths-manifest.json +6 -0
  92. voice_mode/frontend/.next/standalone/.next/server/chunks/463.js +1 -0
  93. voice_mode/frontend/.next/standalone/.next/server/chunks/682.js +6 -0
  94. voice_mode/frontend/.next/standalone/.next/server/chunks/948.js +2 -0
  95. voice_mode/frontend/.next/standalone/.next/server/chunks/994.js +2 -0
  96. voice_mode/frontend/.next/standalone/.next/server/font-manifest.json +1 -0
  97. voice_mode/frontend/.next/standalone/.next/server/middleware-build-manifest.js +1 -0
  98. voice_mode/frontend/.next/standalone/.next/server/middleware-manifest.json +6 -0
  99. voice_mode/frontend/.next/standalone/.next/server/middleware-react-loadable-manifest.js +1 -0
  100. voice_mode/frontend/.next/standalone/.next/server/next-font-manifest.js +1 -0
  101. voice_mode/frontend/.next/standalone/.next/server/next-font-manifest.json +1 -0
  102. voice_mode/frontend/.next/standalone/.next/server/pages/404.html +1 -0
  103. voice_mode/frontend/.next/standalone/.next/server/pages/500.html +1 -0
  104. voice_mode/frontend/.next/standalone/.next/server/pages/_app.js +1 -0
  105. voice_mode/frontend/.next/standalone/.next/server/pages/_app.js.nft.json +1 -0
  106. voice_mode/frontend/.next/standalone/.next/server/pages/_document.js +1 -0
  107. voice_mode/frontend/.next/standalone/.next/server/pages/_document.js.nft.json +1 -0
  108. voice_mode/frontend/.next/standalone/.next/server/pages/_error.js +1 -0
  109. voice_mode/frontend/.next/standalone/.next/server/pages/_error.js.nft.json +1 -0
  110. voice_mode/frontend/.next/standalone/.next/server/pages-manifest.json +1 -0
  111. voice_mode/frontend/.next/standalone/.next/server/server-reference-manifest.js +1 -0
  112. voice_mode/frontend/.next/standalone/.next/server/server-reference-manifest.json +1 -0
  113. voice_mode/frontend/.next/standalone/.next/server/webpack-runtime.js +1 -0
  114. voice_mode/frontend/.next/standalone/package.json +40 -0
  115. voice_mode/frontend/.next/standalone/server.js +38 -0
  116. voice_mode/frontend/.next/static/chunks/117-40bc79a2b97edb21.js +2 -0
  117. voice_mode/frontend/.next/static/chunks/144d3bae-2d5f122b82426d88.js +1 -0
  118. voice_mode/frontend/.next/static/chunks/471-bd4b96a33883dfa2.js +3 -0
  119. voice_mode/frontend/.next/static/chunks/app/_not-found/page-5011050e402ab9c8.js +1 -0
  120. voice_mode/frontend/.next/static/chunks/app/layout-fcb9b9ba5b72c7fc.js +1 -0
  121. voice_mode/frontend/.next/static/chunks/app/page-7c7ec2ad413ace39.js +1 -0
  122. voice_mode/frontend/.next/static/chunks/fd9d1056-af324d327b243cf1.js +1 -0
  123. voice_mode/frontend/.next/static/chunks/framework-f66176bb897dc684.js +1 -0
  124. voice_mode/frontend/.next/static/chunks/main-3163eca598b76a9f.js +1 -0
  125. voice_mode/frontend/.next/static/chunks/main-app-d02bd38ac01adb8a.js +1 -0
  126. voice_mode/frontend/.next/static/chunks/pages/_app-72b849fbd24ac258.js +1 -0
  127. voice_mode/frontend/.next/static/chunks/pages/_error-7ba65e1336b92748.js +1 -0
  128. voice_mode/frontend/.next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
  129. voice_mode/frontend/.next/static/chunks/webpack-0ea9b80f19935b70.js +1 -0
  130. voice_mode/frontend/.next/static/css/a2f49a47752b5010.css +3 -0
  131. voice_mode/frontend/.next/static/media/01099be941da1820-s.woff2 +0 -0
  132. voice_mode/frontend/.next/static/media/39883d31a7792467-s.p.woff2 +0 -0
  133. voice_mode/frontend/.next/static/media/6368404d2e8d66fe-s.woff2 +0 -0
  134. voice_mode/frontend/.next/static/pbDjheefW1LwCua_8mPoZ/_buildManifest.js +1 -0
  135. voice_mode/frontend/.next/static/pbDjheefW1LwCua_8mPoZ/_ssgManifest.js +1 -0
  136. voice_mode/frontend/.next/trace +43 -0
  137. voice_mode/frontend/.next/types/app/api/connection-details/route.ts +343 -0
  138. voice_mode/frontend/.next/types/app/layout.ts +79 -0
  139. voice_mode/frontend/.next/types/app/page.ts +79 -0
  140. voice_mode/frontend/.next/types/package.json +1 -0
  141. voice_mode/frontend/package-lock.json +154 -1
  142. voice_mode/pronounce.py +397 -0
  143. voice_mode/providers.py +7 -8
  144. voice_mode/resources/configuration.py +2 -2
  145. voice_mode/tools/configuration_management.py +106 -5
  146. voice_mode/tools/converse.py +109 -0
  147. voice_mode/tools/pronounce.py +245 -0
  148. voice_mode/tools/transcription/__init__.py +14 -0
  149. voice_mode/tools/transcription/backends.py +287 -0
  150. voice_mode/tools/transcription/core.py +136 -0
  151. voice_mode/tools/transcription/formats.py +144 -0
  152. voice_mode/tools/transcription/types.py +52 -0
  153. {voice_mode-3.34.3.dist-info → voice_mode-4.1.0.dist-info}/METADATA +5 -2
  154. voice_mode-4.1.0.dist-info/RECORD +259 -0
  155. voice_mode/voice_preferences.py +0 -125
  156. voice_mode-3.34.3.dist-info/RECORD +0 -116
  157. {voice_mode-3.34.3.dist-info → voice_mode-4.1.0.dist-info}/WHEEL +0 -0
  158. {voice_mode-3.34.3.dist-info → voice_mode-4.1.0.dist-info}/entry_points.txt +0 -0
@@ -21,6 +21,7 @@
21
21
  "@types/node": "^20.17.13",
22
22
  "@types/react": "^18.3.18",
23
23
  "@types/react-dom": "^18.3.5",
24
+ "autoprefixer": "^10.4.21",
24
25
  "eslint": "^8.57.1",
25
26
  "eslint-config-next": "14.2.29",
26
27
  "eslint-config-prettier": "9.1.0",
@@ -1254,6 +1255,44 @@
1254
1255
  "node": ">= 0.4"
1255
1256
  }
1256
1257
  },
1258
+ "node_modules/autoprefixer": {
1259
+ "version": "10.4.21",
1260
+ "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.21.tgz",
1261
+ "integrity": "sha512-O+A6LWV5LDHSJD3LjHYoNi4VLsj/Whi7k6zG12xTYaU4cQ8oxQGckXNX8cRHK5yOZ/ppVHe0ZBXGzSV9jXdVbQ==",
1262
+ "dev": true,
1263
+ "funding": [
1264
+ {
1265
+ "type": "opencollective",
1266
+ "url": "https://opencollective.com/postcss/"
1267
+ },
1268
+ {
1269
+ "type": "tidelift",
1270
+ "url": "https://tidelift.com/funding/github/npm/autoprefixer"
1271
+ },
1272
+ {
1273
+ "type": "github",
1274
+ "url": "https://github.com/sponsors/ai"
1275
+ }
1276
+ ],
1277
+ "license": "MIT",
1278
+ "dependencies": {
1279
+ "browserslist": "^4.24.4",
1280
+ "caniuse-lite": "^1.0.30001702",
1281
+ "fraction.js": "^4.3.7",
1282
+ "normalize-range": "^0.1.2",
1283
+ "picocolors": "^1.1.1",
1284
+ "postcss-value-parser": "^4.2.0"
1285
+ },
1286
+ "bin": {
1287
+ "autoprefixer": "bin/autoprefixer"
1288
+ },
1289
+ "engines": {
1290
+ "node": "^10 || ^12 || >=14"
1291
+ },
1292
+ "peerDependencies": {
1293
+ "postcss": "^8.1.0"
1294
+ }
1295
+ },
1257
1296
  "node_modules/available-typed-arrays": {
1258
1297
  "version": "1.0.7",
1259
1298
  "dev": true,
@@ -1320,6 +1359,39 @@
1320
1359
  "node": ">=8"
1321
1360
  }
1322
1361
  },
1362
+ "node_modules/browserslist": {
1363
+ "version": "4.25.4",
1364
+ "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.25.4.tgz",
1365
+ "integrity": "sha512-4jYpcjabC606xJ3kw2QwGEZKX0Aw7sgQdZCvIK9dhVSPh76BKo+C+btT1RRofH7B+8iNpEbgGNVWiLki5q93yg==",
1366
+ "dev": true,
1367
+ "funding": [
1368
+ {
1369
+ "type": "opencollective",
1370
+ "url": "https://opencollective.com/browserslist"
1371
+ },
1372
+ {
1373
+ "type": "tidelift",
1374
+ "url": "https://tidelift.com/funding/github/npm/browserslist"
1375
+ },
1376
+ {
1377
+ "type": "github",
1378
+ "url": "https://github.com/sponsors/ai"
1379
+ }
1380
+ ],
1381
+ "license": "MIT",
1382
+ "dependencies": {
1383
+ "caniuse-lite": "^1.0.30001737",
1384
+ "electron-to-chromium": "^1.5.211",
1385
+ "node-releases": "^2.0.19",
1386
+ "update-browserslist-db": "^1.1.3"
1387
+ },
1388
+ "bin": {
1389
+ "browserslist": "cli.js"
1390
+ },
1391
+ "engines": {
1392
+ "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7"
1393
+ }
1394
+ },
1323
1395
  "node_modules/busboy": {
1324
1396
  "version": "1.6.0",
1325
1397
  "dev": true,
@@ -1417,7 +1489,9 @@
1417
1489
  }
1418
1490
  },
1419
1491
  "node_modules/caniuse-lite": {
1420
- "version": "1.0.30001726",
1492
+ "version": "1.0.30001739",
1493
+ "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001739.tgz",
1494
+ "integrity": "sha512-y+j60d6ulelrNSwpPyrHdl+9mJnQzHBr08xm48Qno0nSk4h3Qojh+ziv2qE6rXf4k3tadF4o1J/1tAbVm1NtnA==",
1421
1495
  "dev": true,
1422
1496
  "funding": [
1423
1497
  {
@@ -1699,6 +1773,13 @@
1699
1773
  "dev": true,
1700
1774
  "license": "MIT"
1701
1775
  },
1776
+ "node_modules/electron-to-chromium": {
1777
+ "version": "1.5.211",
1778
+ "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.211.tgz",
1779
+ "integrity": "sha512-IGBvimJkotaLzFnwIVgW9/UD/AOJ2tByUmeOrtqBfACSbAw5b1G0XpvdaieKyc7ULmbwXVx+4e4Be8pOPBrYkw==",
1780
+ "dev": true,
1781
+ "license": "ISC"
1782
+ },
1702
1783
  "node_modules/emoji-regex": {
1703
1784
  "version": "9.2.2",
1704
1785
  "dev": true,
@@ -1865,6 +1946,16 @@
1865
1946
  "url": "https://github.com/sponsors/ljharb"
1866
1947
  }
1867
1948
  },
1949
+ "node_modules/escalade": {
1950
+ "version": "3.2.0",
1951
+ "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
1952
+ "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==",
1953
+ "dev": true,
1954
+ "license": "MIT",
1955
+ "engines": {
1956
+ "node": ">=6"
1957
+ }
1958
+ },
1868
1959
  "node_modules/escape-string-regexp": {
1869
1960
  "version": "4.0.0",
1870
1961
  "dev": true,
@@ -2462,6 +2553,20 @@
2462
2553
  "url": "https://github.com/sponsors/isaacs"
2463
2554
  }
2464
2555
  },
2556
+ "node_modules/fraction.js": {
2557
+ "version": "4.3.7",
2558
+ "resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-4.3.7.tgz",
2559
+ "integrity": "sha512-ZsDfxO51wGAXREY55a7la9LScWpwv9RxIrYABrlvOFBlH/ShPnrtsXeuUIfXKKOVicNxQ+o8JTbJvjS4M89yew==",
2560
+ "dev": true,
2561
+ "license": "MIT",
2562
+ "engines": {
2563
+ "node": "*"
2564
+ },
2565
+ "funding": {
2566
+ "type": "patreon",
2567
+ "url": "https://github.com/sponsors/rawify"
2568
+ }
2569
+ },
2465
2570
  "node_modules/framer-motion": {
2466
2571
  "version": "11.18.2",
2467
2572
  "resolved": "https://registry.npmjs.org/framer-motion/-/framer-motion-11.18.2.tgz",
@@ -3693,6 +3798,13 @@
3693
3798
  "node": "^10 || ^12 || >=14"
3694
3799
  }
3695
3800
  },
3801
+ "node_modules/node-releases": {
3802
+ "version": "2.0.19",
3803
+ "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.19.tgz",
3804
+ "integrity": "sha512-xxOWJsBKtzAq7DY0J+DTzuz58K8e7sJbdgwkbMWQe8UYB6ekmsQ45q0M/tJDsGaZmbC+l7n57UV8Hl5tHxO9uw==",
3805
+ "dev": true,
3806
+ "license": "MIT"
3807
+ },
3696
3808
  "node_modules/normalize-path": {
3697
3809
  "version": "3.0.0",
3698
3810
  "dev": true,
@@ -3701,6 +3813,16 @@
3701
3813
  "node": ">=0.10.0"
3702
3814
  }
3703
3815
  },
3816
+ "node_modules/normalize-range": {
3817
+ "version": "0.1.2",
3818
+ "resolved": "https://registry.npmjs.org/normalize-range/-/normalize-range-0.1.2.tgz",
3819
+ "integrity": "sha512-bdok/XvKII3nUpklnV6P2hxtMNrCboOjAcyBuQnWEhO665FwrSNRxU+AqpsyvO6LgGYPspN+lu5CLtw4jPRKNA==",
3820
+ "dev": true,
3821
+ "license": "MIT",
3822
+ "engines": {
3823
+ "node": ">=0.10.0"
3824
+ }
3825
+ },
3704
3826
  "node_modules/object-assign": {
3705
3827
  "version": "4.1.1",
3706
3828
  "dev": true,
@@ -5218,6 +5340,37 @@
5218
5340
  "@unrs/resolver-binding-win32-x64-msvc": "1.9.2"
5219
5341
  }
5220
5342
  },
5343
+ "node_modules/update-browserslist-db": {
5344
+ "version": "1.1.3",
5345
+ "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.3.tgz",
5346
+ "integrity": "sha512-UxhIZQ+QInVdunkDAaiazvvT/+fXL5Osr0JZlJulepYu6Jd7qJtDZjlur0emRlT71EN3ScPoE7gvsuIKKNavKw==",
5347
+ "dev": true,
5348
+ "funding": [
5349
+ {
5350
+ "type": "opencollective",
5351
+ "url": "https://opencollective.com/browserslist"
5352
+ },
5353
+ {
5354
+ "type": "tidelift",
5355
+ "url": "https://tidelift.com/funding/github/npm/browserslist"
5356
+ },
5357
+ {
5358
+ "type": "github",
5359
+ "url": "https://github.com/sponsors/ai"
5360
+ }
5361
+ ],
5362
+ "license": "MIT",
5363
+ "dependencies": {
5364
+ "escalade": "^3.2.0",
5365
+ "picocolors": "^1.1.1"
5366
+ },
5367
+ "bin": {
5368
+ "update-browserslist-db": "cli.js"
5369
+ },
5370
+ "peerDependencies": {
5371
+ "browserslist": ">= 4.21.0"
5372
+ }
5373
+ },
5221
5374
  "node_modules/uri-js": {
5222
5375
  "version": "4.4.1",
5223
5376
  "dev": true,
@@ -0,0 +1,397 @@
1
+ """
2
+ Pronunciation middleware for TTS and STT text processing.
3
+
4
+ This module provides regex-based text substitutions to improve TTS pronunciation
5
+ and correct STT transcription errors.
6
+ """
7
+
8
+ import logging
9
+ import re
10
+ from pathlib import Path
11
+ from typing import Dict, List, Optional, Tuple
12
+ import yaml
13
+ from dataclasses import dataclass, field
14
+ import os
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @dataclass
20
+ class PronounceRule:
21
+ """A single pronunciation rule."""
22
+ name: str
23
+ pattern: str
24
+ replacement: str
25
+ order: int = 100
26
+ enabled: bool = True
27
+ description: str = ""
28
+ private: bool = True # Default to private for security
29
+ _compiled: Optional[re.Pattern] = field(default=None, init=False, repr=False)
30
+
31
+ def __post_init__(self):
32
+ """Compile the regex pattern after initialization."""
33
+ try:
34
+ self._compiled = re.compile(self.pattern)
35
+ except re.error as e:
36
+ logger.error(f"Invalid regex pattern in rule '{self.name}': {e}")
37
+ self._compiled = None
38
+
39
+ def apply(self, text: str) -> Tuple[str, bool]:
40
+ """Apply this rule to text. Returns (modified_text, was_applied)."""
41
+ if not self.enabled or not self._compiled:
42
+ return text, False
43
+
44
+ original = text
45
+ try:
46
+ text = self._compiled.sub(self.replacement, text)
47
+ return text, text != original
48
+ except Exception as e:
49
+ logger.error(f"Error applying rule '{self.name}': {e}")
50
+ return original, False
51
+
52
+
53
+ class PronounceManager:
54
+ """Manages pronunciation rules for TTS and STT corrections."""
55
+
56
+ def __init__(self, config_paths: Optional[List[Path]] = None):
57
+ """
58
+ Initialize the pronunciation rule manager.
59
+
60
+ Args:
61
+ config_paths: List of config file paths. If None, uses default locations.
62
+ """
63
+ self.rules: Dict[str, List[PronounceRule]] = {
64
+ 'tts': [],
65
+ 'stt': []
66
+ }
67
+ self.config_paths = config_paths or self._get_default_config_paths()
68
+ self._load_all_rules()
69
+
70
+ def _get_default_config_paths(self) -> List[Path]:
71
+ """Get default configuration file paths."""
72
+ paths = []
73
+
74
+ # System defaults
75
+ default_path = Path(__file__).parent / 'data' / 'default_pronunciation.yaml'
76
+ if default_path.exists():
77
+ paths.append(default_path)
78
+
79
+ # User config
80
+ user_config = Path.home() / '.voicemode' / 'config' / 'pronunciation.yaml'
81
+ if user_config.exists():
82
+ paths.append(user_config)
83
+
84
+ # Project config (like Claude Code hooks)
85
+ project_config = Path.cwd() / '.pronunciation.yaml'
86
+ if project_config.exists():
87
+ paths.append(project_config)
88
+
89
+ # Environment variable paths
90
+ env_paths = os.environ.get('VOICEMODE_PRONUNCIATION_CONFIG', '')
91
+ if env_paths:
92
+ for path_str in env_paths.split(':'):
93
+ path = Path(path_str).expanduser()
94
+ if path.exists():
95
+ paths.append(path)
96
+
97
+ return paths
98
+
99
+ def _load_all_rules(self):
100
+ """Load rules from all configured paths."""
101
+ self.rules = {'tts': [], 'stt': []}
102
+
103
+ for config_path in self.config_paths:
104
+ try:
105
+ self._load_rules_from_file(config_path)
106
+ logger.info(f"Loaded pronunciation rules from {config_path}")
107
+ except Exception as e:
108
+ logger.error(f"Failed to load rules from {config_path}: {e}")
109
+
110
+ def _load_rules_from_file(self, config_path: Path):
111
+ """Load rules from a single YAML file."""
112
+ with open(config_path, 'r') as f:
113
+ config = yaml.safe_load(f)
114
+
115
+ if not config:
116
+ return
117
+
118
+ # Load TTS rules
119
+ for rule_dict in config.get('tts_rules', []):
120
+ rule = self._dict_to_rule(rule_dict)
121
+ if rule:
122
+ # Check for duplicate names and override
123
+ self.rules['tts'] = [r for r in self.rules['tts'] if r.name != rule.name]
124
+ self.rules['tts'].append(rule)
125
+
126
+ # Load STT rules
127
+ for rule_dict in config.get('stt_rules', []):
128
+ rule = self._dict_to_rule(rule_dict)
129
+ if rule:
130
+ # Check for duplicate names and override
131
+ self.rules['stt'] = [r for r in self.rules['stt'] if r.name != rule.name]
132
+ self.rules['stt'].append(rule)
133
+
134
+ # Sort rules by order
135
+ self.rules['tts'].sort(key=lambda r: r.order)
136
+ self.rules['stt'].sort(key=lambda r: r.order)
137
+
138
+ def _dict_to_rule(self, rule_dict: dict) -> Optional[PronounceRule]:
139
+ """Convert a dictionary to a PronounceRule."""
140
+ try:
141
+ return PronounceRule(
142
+ name=rule_dict['name'],
143
+ pattern=rule_dict['pattern'],
144
+ replacement=rule_dict['replacement'],
145
+ order=rule_dict.get('order', 100),
146
+ enabled=rule_dict.get('enabled', True),
147
+ description=rule_dict.get('description', ''),
148
+ private=rule_dict.get('private', True) # Default to private
149
+ )
150
+ except (KeyError, TypeError) as e:
151
+ logger.error(f"Invalid rule configuration: {e}")
152
+ return None
153
+
154
+ def process_tts(self, text: str) -> str:
155
+ """
156
+ Apply TTS substitutions before speech generation.
157
+
158
+ Args:
159
+ text: Text to be spoken by TTS
160
+
161
+ Returns:
162
+ Modified text with pronunciation improvements
163
+ """
164
+ log_substitutions = os.environ.get('VOICEMODE_PRONUNCIATION_LOG_SUBSTITUTIONS', '').lower() == 'true'
165
+
166
+ for rule in self.rules['tts']:
167
+ original = text
168
+ text, applied = rule.apply(text)
169
+ if applied and log_substitutions:
170
+ logger.info(f"Pronunciation TTS: Applied rule '{rule.name}': \"{original}\" → \"{text}\"")
171
+
172
+ return text
173
+
174
+ def process_stt(self, text: str) -> str:
175
+ """
176
+ Apply STT corrections after transcription.
177
+
178
+ Args:
179
+ text: Text transcribed from speech
180
+
181
+ Returns:
182
+ Corrected text
183
+ """
184
+ log_substitutions = os.environ.get('VOICEMODE_PRONUNCIATION_LOG_SUBSTITUTIONS', '').lower() == 'true'
185
+
186
+ for rule in self.rules['stt']:
187
+ original = text
188
+ text, applied = rule.apply(text)
189
+ if applied and log_substitutions:
190
+ logger.info(f"Pronunciation STT: Applied rule '{rule.name}': \"{original}\" → \"{text}\"")
191
+
192
+ return text
193
+
194
+ # CRUD Operations
195
+ def add_rule(self, direction: str, pattern: str, replacement: str,
196
+ name: Optional[str] = None, description: str = "",
197
+ enabled: bool = True, order: int = 100,
198
+ private: bool = False) -> bool:
199
+ """
200
+ Add a new pronunciation rule.
201
+
202
+ Args:
203
+ direction: 'tts' or 'stt'
204
+ pattern: Regex pattern to match
205
+ replacement: Replacement text
206
+ name: Rule name (auto-generated if not provided)
207
+ description: Human-readable description
208
+ enabled: Whether rule is active
209
+ order: Processing order
210
+ private: Whether rule is hidden from LLM
211
+
212
+ Returns:
213
+ True if rule was added successfully
214
+ """
215
+ if direction not in ['tts', 'stt']:
216
+ logger.error(f"Invalid direction: {direction}")
217
+ return False
218
+
219
+ # Auto-generate name if not provided
220
+ if not name:
221
+ name = f"{direction}_rule_{len(self.rules[direction])}"
222
+
223
+ # Check for duplicate names
224
+ if any(r.name == name for r in self.rules[direction]):
225
+ logger.error(f"Rule with name '{name}' already exists")
226
+ return False
227
+
228
+ rule = PronounceRule(
229
+ name=name,
230
+ pattern=pattern,
231
+ replacement=replacement,
232
+ order=order,
233
+ enabled=enabled,
234
+ description=description,
235
+ private=private
236
+ )
237
+
238
+ if not rule._compiled:
239
+ return False
240
+
241
+ self.rules[direction].append(rule)
242
+ self.rules[direction].sort(key=lambda r: r.order)
243
+
244
+ # Save to user config
245
+ self._save_user_rules()
246
+ return True
247
+
248
+ def remove_rule(self, direction: str, name: str) -> bool:
249
+ """Remove a pronunciation rule by name."""
250
+ if direction not in ['tts', 'stt']:
251
+ return False
252
+
253
+ original_count = len(self.rules[direction])
254
+ self.rules[direction] = [r for r in self.rules[direction] if r.name != name]
255
+
256
+ if len(self.rules[direction]) < original_count:
257
+ self._save_user_rules()
258
+ return True
259
+ return False
260
+
261
+ def list_rules(self, direction: Optional[str] = None,
262
+ include_private: bool = False) -> List[dict]:
263
+ """
264
+ List all rules or rules for specific direction.
265
+
266
+ Args:
267
+ direction: 'tts', 'stt', or None for all
268
+ include_private: Whether to include private rules (for CLI, not MCP)
269
+
270
+ Returns:
271
+ List of rule dictionaries
272
+ """
273
+ rules = []
274
+
275
+ directions = [direction] if direction else ['tts', 'stt']
276
+
277
+ for dir in directions:
278
+ if dir not in self.rules:
279
+ continue
280
+
281
+ for rule in self.rules[dir]:
282
+ # Skip private rules unless explicitly requested
283
+ if rule.private and not include_private:
284
+ continue
285
+
286
+ rules.append({
287
+ 'direction': dir,
288
+ 'name': rule.name,
289
+ 'pattern': rule.pattern,
290
+ 'replacement': rule.replacement,
291
+ 'order': rule.order,
292
+ 'enabled': rule.enabled,
293
+ 'description': rule.description,
294
+ 'private': rule.private
295
+ })
296
+
297
+ return rules
298
+
299
+ def enable_rule(self, direction: str, name: str) -> bool:
300
+ """Enable a specific rule."""
301
+ if direction not in ['tts', 'stt']:
302
+ return False
303
+
304
+ for rule in self.rules[direction]:
305
+ if rule.name == name:
306
+ if rule.private:
307
+ logger.warning(f"Cannot enable private rule '{name}' via API")
308
+ return False
309
+ rule.enabled = True
310
+ self._save_user_rules()
311
+ return True
312
+ return False
313
+
314
+ def disable_rule(self, direction: str, name: str) -> bool:
315
+ """Disable a specific rule."""
316
+ if direction not in ['tts', 'stt']:
317
+ return False
318
+
319
+ for rule in self.rules[direction]:
320
+ if rule.name == name:
321
+ if rule.private:
322
+ logger.warning(f"Cannot disable private rule '{name}' via API")
323
+ return False
324
+ rule.enabled = False
325
+ self._save_user_rules()
326
+ return True
327
+ return False
328
+
329
+ def test_rule(self, text: str, direction: str = "tts") -> str:
330
+ """Test what a text would become after applying rules."""
331
+ if direction == 'tts':
332
+ return self.process_tts(text)
333
+ elif direction == 'stt':
334
+ return self.process_stt(text)
335
+ else:
336
+ return text
337
+
338
+ def reload_rules(self):
339
+ """Reload all rules from configuration files."""
340
+ self._load_all_rules()
341
+ logger.info("Reloaded pronunciation rules")
342
+
343
+ def _save_user_rules(self):
344
+ """Save current rules to user config file."""
345
+ user_config = Path.home() / '.voicemode' / 'config' / 'pronunciation.yaml'
346
+ user_config.parent.mkdir(parents=True, exist_ok=True)
347
+
348
+ # Only save non-default rules
349
+ config = {
350
+ 'version': 1,
351
+ 'tts_rules': [],
352
+ 'stt_rules': []
353
+ }
354
+
355
+ for rule in self.rules['tts']:
356
+ config['tts_rules'].append({
357
+ 'name': rule.name,
358
+ 'order': rule.order,
359
+ 'pattern': rule.pattern,
360
+ 'replacement': rule.replacement,
361
+ 'enabled': rule.enabled,
362
+ 'description': rule.description,
363
+ 'private': rule.private
364
+ })
365
+
366
+ for rule in self.rules['stt']:
367
+ config['stt_rules'].append({
368
+ 'name': rule.name,
369
+ 'order': rule.order,
370
+ 'pattern': rule.pattern,
371
+ 'replacement': rule.replacement,
372
+ 'enabled': rule.enabled,
373
+ 'description': rule.description,
374
+ 'private': rule.private
375
+ })
376
+
377
+ with open(user_config, 'w') as f:
378
+ yaml.safe_dump(config, f, default_flow_style=False, sort_keys=False)
379
+
380
+ logger.info(f"Saved pronunciation rules to {user_config}")
381
+
382
+
383
+ # Global instance (lazy loaded)
384
+ _manager: Optional[PronounceManager] = None
385
+
386
+
387
+ def get_manager() -> PronounceManager:
388
+ """Get or create the global pronunciation manager."""
389
+ global _manager
390
+ if _manager is None:
391
+ _manager = PronounceManager()
392
+ return _manager
393
+
394
+
395
+ def is_enabled() -> bool:
396
+ """Check if pronunciation middleware is enabled."""
397
+ return os.environ.get('VOICEMODE_PRONUNCIATION_ENABLED', 'true').lower() == 'true'
voice_mode/providers.py CHANGED
@@ -9,9 +9,8 @@ import logging
9
9
  from typing import Dict, Optional, List, Any, Tuple
10
10
  from openai import AsyncOpenAI
11
11
 
12
- from .config import TTS_VOICES, TTS_MODELS, TTS_BASE_URLS, OPENAI_API_KEY
12
+ from .config import TTS_VOICES, TTS_MODELS, TTS_BASE_URLS, OPENAI_API_KEY, get_voice_preferences
13
13
  from .provider_discovery import provider_registry, EndpointInfo
14
- from .voice_preferences import get_preferred_voices
15
14
 
16
15
  logger = logging.getLogger("voice-mode")
17
16
 
@@ -68,14 +67,14 @@ async def get_tts_client_and_voice(
68
67
  return client, selected_voice, selected_model, endpoint_info
69
68
 
70
69
  # Voice-first selection algorithm
71
- # Get user preferences and prepend to system defaults
72
- user_preferences = get_preferred_voices()
73
- combined_voice_list = user_preferences + [v for v in TTS_VOICES if v not in user_preferences]
70
+ # Get user preferences from configuration
71
+ voice_preferences = get_voice_preferences()
72
+ combined_voice_list = voice_preferences
74
73
 
75
74
  logger.info(f"TTS Provider Selection (voice-first)")
76
- if user_preferences:
77
- logger.info(f" User voice preferences: {user_preferences}")
78
- logger.info(f" Combined voice list: {combined_voice_list}")
75
+ if voice_preferences:
76
+ logger.info(f" Voice preferences: {voice_preferences}")
77
+ logger.info(f" Voice list: {combined_voice_list}")
79
78
  logger.info(f" Preferred models: {TTS_MODELS}")
80
79
  logger.info(f" Available endpoints: {TTS_BASE_URLS}")
81
80
 
@@ -267,7 +267,7 @@ async def environment_variables() -> str:
267
267
  ("VOICEMODE_AUTO_START_KOKORO", "Auto-start Kokoro service (true/false)"),
268
268
  ("VOICEMODE_TTS_BASE_URLS", "Comma-separated list of TTS endpoints"),
269
269
  ("VOICEMODE_STT_BASE_URLS", "Comma-separated list of STT endpoints"),
270
- ("VOICEMODE_TTS_VOICES", "Comma-separated list of preferred voices"),
270
+ ("VOICEMODE_VOICES", "Comma-separated list of preferred voices"),
271
271
  ("VOICEMODE_TTS_MODELS", "Comma-separated list of preferred models"),
272
272
  # Audio Settings
273
273
  ("VOICEMODE_AUDIO_FORMAT", "Audio format for recording (pcm/mp3/wav/flac/aac/opus)"),
@@ -358,7 +358,7 @@ async def environment_template() -> str:
358
358
  f"export VOICEMODE_AUTO_START_KOKORO=\"{str(AUTO_START_KOKORO).lower()}\"",
359
359
  f"export VOICEMODE_TTS_BASE_URLS=\"{','.join(TTS_BASE_URLS)}\"",
360
360
  f"export VOICEMODE_STT_BASE_URLS=\"{','.join(STT_BASE_URLS)}\"",
361
- f"export VOICEMODE_TTS_VOICES=\"{','.join(TTS_VOICES)}\"",
361
+ f"export VOICEMODE_VOICES=\"{','.join(TTS_VOICES)}\"",
362
362
  f"export VOICEMODE_TTS_MODELS=\"{','.join(TTS_MODELS)}\"",
363
363
  "",
364
364
  "# Audio Settings",